Fluoresence microscopy experiments

An Incucyte was used to image plates of cells transfected with various mCherry or mSclaret-encoding constructs. Even columns had dox-induced TDP-43 knockdown.

Quantification was performed using CellProfiler.

Additionally, for some experiments Nanopore was performed, which is also quantified.

Generate images from the first plate and mScarlet control from second

generate_full_image <- function(combined_positions, all_images, n_rows, 
                                n_columns=6, dim_image=200, spacing_images=5,
                                spacing_wells=30, background_quantile=0.97){
  
  # This function makes a plot from incucyte data.
  # You need to supply a "combined_positions" dataframe that has values for
  # plate, well and Order
  
  # Currently it only works when you have four images per well (stupidly named "replicates")
  
  height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
  width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
  
  full_image <- matrix(nrow = height, ncol = width, 0)
  
  background <- 10000000 # set arbitrarily large initial value
  
  for(image in all_images){
    filename <- word(image, sep="/", start=-1, end = -1)
    plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
    
    well = word(filename, sep="_", start=2, end=2)
    plate_column = as.numeric(str_sub(well, 2, 3))
    
    image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
    image_row = unique(combined_positions$Order[which(combined_positions$well == well & 
                                                        combined_positions$Plate == plate)])
    
    if(is.na(image_row)){
      next
    }
    
    if("replicate" %in% colnames(combined_positions)){
      replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
    } else {
      replicate = as.numeric(word(filename, sep="_", start=3, end=3))
    }
    
    image_matrix <- readImage(image)
    smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
    
    well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    
    image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
    
    well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    
    image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
    
    full_image[image_top_left_y:(image_top_left_y+dim_image-1),
               image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
    
    this_background <- quantile(smaller, 0.05)
    
    if(this_background < background){
      background <- this_background
    }
  }
  
  full_image2 <- full_image - background
  img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
  
  return(img_invert)
}

# Red channel images from 96 well plate - internal cryptics
images_96 <- Sys.glob(paste0(data_dir, "oscar 12_05_2022/96 well red/*.png"))
# Red channel images from second 96 well plate - AARS1 upstream and controls
images_24 <- Sys.glob(paste0(data_dir, "oscar 12_05_2022/control plate red/*.png"))

all_images <- c(images_96, images_24)

all_files <- unique(word(all_images, sep="/", start=-1, end = -1))

position_df <- data.frame(filename = all_files) %>%
  mutate(well = word(filename, sep="_", 2, 2)) %>%
  mutate(Plate = ifelse(str_detect(filename, "96"), 1, 2)) %>%
  mutate(col = as.numeric(str_sub(well, 2, 3))) %>%
  mutate(Position = paste0(str_sub(well, 1, 1), ifelse(col <= 6, "1-6", "7-12")))

positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
  mutate(row = 1:n())
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
combined_positions <- position_df %>%
  left_join(positions, by = c("Position", "Plate"))

img <- generate_full_image(combined_positions, all_images, n_rows = 14)
display(img)

writeImage(img, "markdown_images/mScar/mScarlet_inverted_ordered.jpeg", quality=90)

mCherry constructs

combined_positions_mcherry <- combined_positions %>%
  select(-Order) %>%
  mutate(Order = case_when(Construct == "R3" ~ 1,
                           Construct == "mCherry" ~ 2,
                           Construct == "untransfected" ~ 3))
img <- generate_full_image(combined_positions_mcherry, all_images, n_rows = 3)
display(img)

writeImage(img, "markdown_images/mCherry/mCherry_inverted_ordered.jpeg", quality=90)

rm(img)

Quantification or fluoresence of the above, first do mScarlet

# Read in the data from each object identitifed by cell profiler
objects_df <- read_csv(paste0(data_dir, "oscar 12_05_2022/cell_profiler_output/incucyte_12_05_2022_lower_thresholdIdentifyPrimaryObjects.csv"))
## Rows: 118819 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
  group_by(FileName_incucyte_12_05_2022) %>%
  mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
  select(FileName_incucyte_12_05_2022, integrated) %>%
  unique() %>%
  mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3))) %>%
  mutate(Plate = ifelse(str_detect(FileName_incucyte_12_05_2022, "96"), 1, 2)) %>%
  mutate(Position = ifelse(column <= 6, paste0(row, "1-6"), paste0(row, "7-12"))) %>%
  left_join(positions, by = c("Position", "Plate")) %>%
  mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
  group_by(well, Plate) %>%
  mutate(mean_red = mean(integrated),
         sd_red = sd(integrated))

# Now calculate summaries for each condition
integrated_summary <- integrated_df %>%
  ungroup() %>%
  group_by(Construct, Treatment) %>%
  mutate(mean_condition = ifelse(mean(mean_red) < 0, 0, mean(mean_red)),
         sd_condition = sd(mean_red)) %>%
  select(Construct, mean_condition, sd_condition, Treatment, Order) %>%
  unique() %>%
  group_by(Construct) %>%
  mutate(mean_NT = max(ifelse(Treatment == "NT", mean_condition, -1000000000000))) %>%
  mutate(mean_dox = max(ifelse(Treatment == "shTDP", mean_condition, -100000000000))) %>%
  mutate(log2FC = log2(mean_dox/mean_NT)) %>%
  mutate(diff_frac = (mean_dox-mean_NT)/mean_dox)

# Produce a plot specifically for the mScarlet ones

good_names_df <- data.frame(Order = c(1:14),
                            good_name = factor(c(1:13, "+ve"), levels = c(1:13, "+ve")))

integrated_mscar <- integrated_df %>%
  filter(!is.na(Order)) %>%
  left_join(good_names_df)
## Joining with `by = join_by(Order)`
integrated_summary_mscar <- integrated_summary %>%
  filter(!is.na(Order)) %>%
  left_join(good_names_df) %>%
  mutate(y_pos = 2.4*max(mean_condition+sd_condition))
## Joining with `by = join_by(Order)`
ggplot(integrated_mscar, aes(x = factor(good_name), y=mean_red, colour= Treatment, fill = Treatment)) +
  scale_y_log10() +
  geom_bar(data = integrated_summary_mscar, aes(x = factor(good_name), y = mean_condition), 
           position = position_dodge(width = 0.8),
           width = 0.8,
           stat="identity", alpha = 1) +
  geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
  geom_errorbar(aes(x = good_name, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)), 
                position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
  geom_text(data = integrated_summary_mscar, aes(x = factor(good_name), y = y_pos, 
                                                 label = round(log2FC,1)), colour="grey20", size=3.5) +
  ggeasy::easy_rotate_x_labels() +
  ylab("Red fluorescence intensity") +
  xlab("") +
  ggpubr::theme_pubclean() +
  ggeasy::easy_remove_legend() +
  ggsci::scale_fill_npg() +
  ggsci::scale_colour_npg()
## Registered S3 methods overwritten by 'car':
##   method       from
##   hist.boot    FSA 
##   confint.boot FSA
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis

ggsave("markdown_images/mScar/mScar intensities quantified and ordered.pdf", height = 8, width = 12, units="cm")
## Warning in self$trans$transform(x): NaNs produced

## Warning in self$trans$transform(x): Transformation introduced infinite values
## in continuous y-axis

Quantify mScarlet results in SHSY5Y cells

We also transfected our constructs into SHSY5Y cells to check that it wasn’t specific only SKNBE2 cells

objects_df <- read_csv(paste0(data_dir, "SHSY5Y incucyte/output_lower_threshold/IdentifyPrimaryObjects.csv"))
## Rows: 237882 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_image_files <- word(Sys.glob(paste0(data_dir, "SHSY5Y incucyte/as stored/orange/*.png")), -1, sep="/")

positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv")
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sh_positions = data.frame(row = c("A", "B", "C", "D", "E", "F", "G", "H"),
                       Construct = c("mScarlet", "A10", "A11", "B5", "B11", "B12", "C2", "C3")) %>%
  left_join(positions) %>%
  mutate(plasmid = factor(case_when(Construct == "mScarlet" ~ "+ve",
                             T~paste0(Order)),
                          levels = c("+ve", "6", "7", "8", "9", "10", "11", "12")))
## Joining with `by = join_by(Construct)`
# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
  group_by(FileName_incucyte_12_05_2022) %>%
  mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
  select(FileName_incucyte_12_05_2022, integrated) %>%
  unique() %>%
  full_join(data.frame(FileName_incucyte_12_05_2022 = all_image_files))
## Joining with `by = join_by(FileName_incucyte_12_05_2022)`
integrated_df[is.na(integrated_df)] <- 0

integrated_df2 <- integrated_df %>%
  mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3))) %>%
  filter(column < 7) %>%
  #left_join(positions, by = c("Position", "Plate")) %>%
  mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
  group_by(well) %>%
  mutate(mean_red = mean(integrated),
         sd_red = sd(integrated)) %>%
  left_join(sh_positions)
## Joining with `by = join_by(row)`
ggplot(integrated_df2, aes(x = column, y = row, fill = mean_red)) +
  geom_tile()

summary_df <- integrated_df2 %>%
  ungroup() %>%
  select(row, mean_red, sd_red, Treatment) %>%
  unique() %>%
  group_by(row, Treatment) %>%
  mutate(mean_condition = mean(mean_red),
         sd_condition = sd(mean_red)) %>%
  select(row, Treatment, mean_condition, sd_condition) %>%
  unique() %>%
  left_join(sh_positions) %>%
  mutate()
## Joining with `by = join_by(row)`
ggplot(integrated_df2, aes(x = plasmid, y=mean_red, colour= Treatment, fill = Treatment)) +
  geom_bar(data = summary_df, aes(x = plasmid, y = mean_condition), 
           position = position_dodge(width = 0.8),
           width = 0.8,
           stat="identity", alpha = 1) +
  geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
  geom_errorbar(aes(x = plasmid, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)), 
                position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
  ggeasy::easy_rotate_x_labels() +
  ylab("Red fluorescence intensity") +
  xlab("mScarlet Construct #") +
  ggpubr::theme_pubclean() +
  ggsci::scale_fill_npg() +
  ggsci::scale_color_npg() +
  ggeasy::easy_move_legend("right")

ggsave("markdown_images/SHSY5Y mScarlet/quantification of SHSY5Y mScarlet.pdf", height = 8, width = 12, units = "cm")

Let’s compare results between SKNDZ and SHSYSY

sk_sh <- integrated_summary_mscar %>%
  ungroup() %>%
  select(Order, sk = mean_dox) %>%
  unique() %>%
  inner_join(summary_df %>% filter(Treatment == "shTDP") %>% select(Order, sh = mean_condition)) %>%
  filter(Order != 14)  # remove positive control
## Adding missing grouping variables: `row`, `Treatment`
## Joining with `by = join_by(Order)`
ggplot(sk_sh, aes(x = sk, y = sh)) +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method="lm", se=F) +
  geom_point() +
  xlab("Fluoresence in SK-N-BE(2) shTDP")  +
  ylab("Fluoresence in SH-SY5Y shTDP") +
  ggpubr::theme_classic2() +
  geom_text(aes(x=1300,y=30,label=paste0("r = ", round(cor(sk, sh, method="spearman"),2)))) +
  geom_label_repel(aes(label = paste0(Order)), alpha = 0.8)
## `geom_smooth()` using formula = 'y ~ x'

ggsave("markdown_images/SHSY5Y mScarlet/SHSY5Y versus SKNDZ.pdf", height = 8, width = 8, units = "cm")
## `geom_smooth()` using formula = 'y ~ x'

And now mCherry in SKNBE2

good_names_mCherry <- c("Cryptic\nmCherry", "Constitutive\nmCherry", "-ve")
good_names_mCherry_df <- data.frame(good_name = factor(good_names_mCherry, levels = good_names_mCherry),
                                    Construct = c("R3", "mCherry", "untransfected"))

integrated_mCherry <- integrated_df2 %>%
  left_join(good_names_mCherry_df) %>%
  select(-Order) %>%
  mutate(Order = case_when(Construct == "R3" ~ 1,
                           Construct == "mCherry" ~ 2,
                           Construct == "untransfected" ~ 3)) %>%
  filter(!is.na(good_name))
## Joining with `by = join_by(Construct)`
integrated_summary_mCherry <- integrated_summary %>%
    left_join(good_names_mCherry_df) %>%
  select(-Order) %>%
  mutate(Order = case_when(Construct == "R3" ~ 1,
                           Construct == "mCherry" ~ 2,
                           Construct == "untransfected" ~ 3)) %>%
  filter(!is.na(good_name)) %>%
  mutate(y_pos = 2.4*max(mean_condition+sd_condition))
## Joining with `by = join_by(Construct)`
ggplot(integrated_mCherry, aes(x = factor(good_name), y=mean_red, colour= Treatment, fill = Treatment)) +
  scale_y_log10() +
  geom_bar(data = integrated_summary_mCherry, aes(x = factor(good_name), y = mean_condition), 
           position = position_dodge(width = 0.8),
           width = 0.8,
           stat="identity", alpha = 1) +
  geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
  geom_errorbar(aes(x = good_name, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)), 
                position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
  geom_text(data = integrated_summary_mCherry, aes(x = factor(good_name), y = y_pos, 
                                                 label = round(log2FC,1)), colour="grey20", size=3.5) +
  ggeasy::easy_rotate_x_labels() +
  ylab("Red fluorescence intensity") +
  xlab("") +
  ggpubr::theme_pubclean() +
  ggeasy::easy_remove_legend() +
  ggsci::scale_fill_npg() +
  ggsci::scale_color_npg()

ggsave("markdown_images/mCherry/mCherry intensities quantified.pdf", height = 7, width = 8, units="cm")

rm(list=setdiff(ls(), "data_dir"))

Cas9 mCherry

We swapped out the AARS1 cryptic exon for a library of potential CEs, each encoding the same part of Cas9.

Analysis of the Illumina sequencing data

Note that “UPI” stands for “Unique plasmid identifier” i.e. it’s a barcode

This code relies on the output of the python scripts “extract_umis_and_Seqs.py” and “splice_ai_of_cas.py” which are run using a snakemake pipeline.

min_reads <- 7

dox <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9D.csv.gz")) %>%
  mutate(treatment = "shTDP") %>%
  left_join(read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9D_consensus.csv")))
## Rows: 137952 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): upi, umi, CE_seq
## dbl (5): intron1_IR, intron2_IR, both_IR, no_CE, with_CE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 25300 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): upi, seq
## dbl (1): n
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(upi)`
nt <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9N.csv.gz")) %>%
  mutate(treatment = "NT") %>%
  left_join(read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9N_consensus.csv")))
## Rows: 123047 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): upi, umi, CE_seq
## dbl (5): intron1_IR, intron2_IR, both_IR, no_CE, with_CE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 24378 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): upi, seq
## dbl (1): n
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(upi)`
pred <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/spliceai/spliceai_results.csv"))
## Rows: 2637 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): upi
## dbl (2): acc, don
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#pred <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/spliceai/yo.csv"))

# Combine data from each and calculate the fraction of reads that are cryptic etc
# Filter just for plasmids which were detected in both

combined <- bind_rows(dox, nt) %>%
  # remove PCR duplicates
  distinct(treatment, upi, umi, .keep_all = T) %>%
  # get summaries
  group_by(treatment, upi) %>%
  # analyse fractions
  mutate(total = sum(with_CE + no_CE + intron1_IR + intron2_IR + both_IR)) %>%
  mutate(frac = sum(with_CE)/total) %>%
  select(upi, treatment, frac, total, seq, total) %>%
  unique() %>%
  ungroup() %>%
  filter(!is.na(frac))

# Filter for those that we have sequence data for and pivot wider
pivoted <- combined %>%
  ungroup() %>%
  dplyr::select(upi, treatment, frac, seq, total) %>%
  pivot_wider(names_from = treatment, values_from = c("frac", "seq", "total")) %>%
  ungroup() %>%
  mutate(seq2 = case_when(is.na(seq_shTDP) & is.na(seq_NT) ~ "unclear",
                          is.na(seq_shTDP) ~ seq_NT,
                          is.na(seq_NT) ~ seq_shTDP,
                          seq_shTDP != seq_NT ~ "different",
                          seq_shTDP == seq_NT ~ seq_NT)) %>%
  mutate(seq2 = ifelse(seq2 == "unclear", NA, seq2))

pivoted_just_good <- pivoted %>%
  select(-seq_shTDP, -seq_NT) %>%
  left_join(pred) %>%
  filter(!str_detect(seq2, "N"),
         str_length(seq2) == 154) %>%
  filter(!is.na(acc)) %>%
  mutate(score = acc+don) 
## Joining with `by = join_by(upi)`
plot_nt_spliceai <- ggplot(pivoted_just_good %>%
  filter(total_NT > min_reads), aes(x = 100*frac_NT, y = score, colour="grey20")) +
  geom_hex(bins=20) +
  scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
  theme_classic() +
  scale_color_identity() +
  geom_text(aes(x=90,y=0.1,label=paste0("rho = ", round(cor(score, frac_NT, method="spearman"),2)))) +
  xlab("% Inclusion") +
  ylab("Combined SpliceAI Score")
  
plot_dox_spliceai <- ggplot(pivoted_just_good %>%
  filter(total_shTDP > min_reads), aes(x = 100*frac_shTDP, y = score, colour="grey20")) +
  geom_hex(bins=20) +
  scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
  theme_classic() +
  scale_color_identity() +
  geom_text(aes(x=90,y=0.1,label=paste0("rho = ", round(cor(score, frac_shTDP, method="spearman"),2)))) +
  xlab("% Inclusion") +
  ylab("Combined SpliceAI Score")

#ggsave("markdown_images/mCherry/splice_correlations.pdf", height = 7, width = 17, units="cm")


pivoted_high_both <- pivoted_just_good %>%
  filter(total_shTDP > min_reads & total_NT > min_reads)

plot_psi_comparison <- ggplot(pivoted_high_both, aes(x = 100*frac_NT, y = 100*frac_shTDP, colour = "grey20")) +
  geom_hex(bins=20) +
    scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
  theme_classic() +
  scale_color_identity() +
  ylab("CE PSI shTDP") +
  xlab("CE PSI NT")

(plot_psi_comparison | plot_psi_comparison) / (plot_nt_spliceai | plot_dox_spliceai)

ggsave("markdown_images/mCherry/all_three_cas9_plots.pdf", height=14, width=18, units="cm")

Analysis of cas9 mCherry fluoresence

objects_df <- read_csv(paste0(data_dir, "Cas9_mCherry/cell_profiler_output/cas9_design2_mChIdentifyPrimaryObjects.csv"))
## Rows: 42388 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plasmids = c("+ve", "A11", "Cas9-mCh", "-ve")

position_df <- data.frame(row = c("A", 'B', 'C', 'D'),
                          plasmid = factor(plasmids, levels = c("+ve", "-ve", "Cas9-mCh", "A11"))) 

# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
  group_by(FileName_incucyte_12_05_2022) %>%
  mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
  select(FileName_incucyte_12_05_2022, integrated) %>%
  unique() %>%
  mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3))) %>%
  mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
  left_join(position_df) %>%
  group_by(plasmid, Treatment) %>%
  mutate(average_intensity = mean(integrated),
         sd_intensity = sd(integrated))
## Joining with `by = join_by(row)`
summary_df <- integrated_df %>%
  select(average_intensity, sd_intensity) %>%
  unique()
## Adding missing grouping variables: `plasmid`, `Treatment`
log2fc_df <- summary_df %>%
  ungroup() %>%
  group_by(plasmid) %>%
  pivot_wider(names_from = Treatment, values_from = c("average_intensity", "sd_intensity")) %>%
  mutate(log2fc = log2(average_intensity_shTDP/average_intensity_NT))

log2fc_summary_df <- summary_df %>%
  left_join(log2fc_df %>% select(plasmid, log2fc)) %>%
  ungroup() %>%
  group_by(plasmid) %>%
  mutate(ymax = max(average_intensity+sd_intensity/sqrt(3))) %>%
  ungroup() %>%
  select(plasmid, ymax, log2fc) %>%
  unique()
## Joining with `by = join_by(plasmid)`
ggplot(summary_df %>% filter(plasmid != "A11"), 
       aes(x = plasmid, y = average_intensity)) +
  geom_bar(stat="identity", position="dodge",
           aes(fill = Treatment)) +
  geom_errorbar(aes(ymin = average_intensity-sd_intensity/sqrt(3),
                    ymax = average_intensity+sd_intensity/sqrt(3),
                    fill = Treatment),
                position="dodge") +
  ylab("Integrated Intensity (arbitrary units)") +
  xlab("") +
  ggtitle("Cas9 Design 2",
          "N=4, error bar=SEM, log2fc shown") +
  theme_classic() +
  ggeasy::easy_remove_legend_title() +
  ggsci::scale_fill_npg() #+
## Warning in geom_errorbar(aes(ymin = average_intensity - sd_intensity/sqrt(3), :
## Ignoring unknown aesthetics: fill

  # geom_text(data = log2fc_summary_df %>%
  #             filter(plasmid!="A11") , aes(x = plasmid, y = ymax+3, label = signif(log2fc, 2)))

ggsave("markdown_images/mCherry/Cas9_mCherry_quantification.pdf", height = 8, width = 10, units= "cm")

rm(list=setdiff(ls(), "data_dir"))

Raver rescue

To help demonstrate that the expression of mScarlet above is specifically due to TDP-43 knockdown, we tried a similar experiment to before, except that either function or non-functional (2FL mutant) TDP-43/Raver1 fusion protein, which is known to rescue TDP-43 loss of splicing function, was co-transfected.

mini_spacing = 5
big_spacing = 15
dim_image = 200
norm_quantile <- 0.995
background_quantile <- 0.02

# Find all relevant image files
image_files <- Sys.glob(paste0(data_dir, "raver rescue/orange/*.png"))

# Assign the layout
image_df <- data.frame(filename = word(image_files, -1, sep="/"),
                       full_dir = image_files) %>%
  mutate(well = word(filename, 2, sep="_")) %>%
  mutate(row = str_sub(well,1,1)) %>%
  mutate(column = as.numeric(str_sub(well, 2, -1)),
         image_number = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(big_row = ifelse(column <= 6, 1, 2),
         small_row = ifelse(image_number > 2, 2, 1),
         big_column = ifelse(column <= 6, column, column - 6),
         small_column = ifelse(image_number > 2, image_number - 2, image_number)) %>%
  mutate(n_mini_spaces_x = big_column - 1 + small_column - 1,
         n_big_spaces_x = big_column - 1,
         n_mini_spaces_y = big_row - 1 + small_row - 1,
         n_big_spaces_y = big_row - 1,
         n_previous_images_x = 2*(big_column-1) + small_column - 1,
         n_previous_images_y = 2*(big_row-1) + small_row - 1) %>%
  mutate(start_x = 1+n_big_spaces_x*big_spacing + n_mini_spaces_x*mini_spacing + n_previous_images_x*dim_image) %>%
  mutate(start_y = 1+n_big_spaces_y*big_spacing + n_mini_spaces_y*mini_spacing + n_previous_images_y*dim_image)

# Find common normalisation factor
for(filename in image_df$full_dir){
  if(filename == image_df$full_dir[1]){
    all_vals <- sample(as.vector(readImage(filename)), 100)
  } else {
    all_vals <- c(all_vals, sample(as.vector(readImage(filename)), 100))
  }
}

normalisation <- quantile(all_vals, norm_quantile)

background_df <- image_df %>% filter(row == "A")
# Find background, using only Row A as these were untransfected
for(filename in background_df$full_dir){
  if(filename == background_df$full_dir[1]){
    all_vals2 <- sample(as.vector(readImage(filename)), 100)
  } else {
    all_vals2 <- c(all_vals2, sample(as.vector(readImage(filename)), 100))
  }
}

background <- quantile(all_vals, background_quantile)

for(this_row in unique(image_df$row)){
  row_df <- image_df %>%
    filter(row == this_row)
  
  height = max(row_df$start_y) + dim_image
  width = max(row_df$start_x) + dim_image
  
  full_image <- matrix(nrow = height, ncol = width, 0)
  
  for(i in 1:nrow(row_df)){
    this_image <- readImage(row_df$full_dir[i])
    smaller <- as.array(EBImage::resize(this_image, w=dim_image, h=dim_image))
    
    start_x = row_df$start_x[i]
    start_y = row_df$start_y[i]
    
    full_image[start_y:(start_y+dim_image-1), start_x:(start_x+dim_image-1)] <- smaller
    
  }
  
  img <- Image(1-t((full_image-background)/normalisation))
  
  writeImage(img, paste0("markdown_images/individual_raver_rescue/Row", this_row, ".jpeg") , quality=90)
}

Now plot the CellProfiler quantification of these images

calculate_errors_y_over_x <- function(x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc = F){
  x_vals <- rnorm(n=n, mean=x_mean, sd = x_sem)
  y_vals <- rnorm(n=n, mean=y_mean, sd = y_sem)
  
  if(log2fc){
    ratios <- log2(y_vals/x_vals)
    ratios <- ratios[!is.na(ratios)]
  } else {
    ratios <- y_vals/x_vals
  }
  
  return(quantile(ratios, quant))
}

objects_df <- read_csv(paste0(data_dir, "raver rescue/cell_profiler_output/incuyte rescue analysisIdentifyPrimaryObjects.csv"))
## Rows: 169619 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plasmids <- c("mScarlet", "A10", "A11", "B5", "B11", "B12", "C2")
plasmid_df <- data.frame(row = c('B', 'C', 'D', 'E', 'F', 'G', 'H'),
                          plasmid = factor(plasmids, levels = plasmids)) %>%
  mutate(TDP_REG = ifelse(plasmid == "mScarlet", F, T))

positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv")
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
integrated_df <- objects_df %>%
  group_by(FileName_incucyte_12_05_2022) %>%
  mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
  select(FileName_incucyte_12_05_2022, integrated) %>%
  unique() %>%
  mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3)))  %>%
  mutate(dox = ifelse(column %% 2 == 0, "shTDP", "NT"),
         construct = ifelse(column <= 6, "Raver", "2FL")) %>%
  ungroup() %>%
  group_by(dox, construct, row, column) %>%
  mutate(average_intensity_well = mean(integrated)) %>%
  ungroup() %>%
  group_by(dox, construct, row) %>%
  select(average_intensity_well) %>%
  unique() %>%
  mutate(average_intensity = mean(average_intensity_well),
         sd_intensity = sd(average_intensity_well))
## Adding missing grouping variables: `dox`, `construct`, `row`
summary_df <- integrated_df %>%
  select(average_intensity, sd_intensity) %>%
  unique() %>%
  filter(row != "A")
## Adding missing grouping variables: `dox`, `construct`, `row`
log2fc_df <- summary_df %>%
  ungroup() %>%
  group_by(dox) %>%
  pivot_wider(names_from = construct, values_from = c("average_intensity", "sd_intensity")) %>%
  mutate(log2fc = log2(average_intensity_2FL/average_intensity_Raver)) %>%
  left_join(plasmid_df) %>%
  left_join(positions %>% dplyr::rename(plasmid = Construct)) %>%
  mutate(good_name = factor(ifelse(plasmid == "mScarlet", "+ve", Order),
                            levels = c("+ve", "6", "7", "9",
                                       "10", "11", "12")))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(plasmid)`
log2fc_df$log2fc_5pc <- mapply(calculate_errors_y_over_x,
                               log2fc_df$average_intensity_Raver,
                               sqrt(log2fc_df$sd_intensity_Raver),
                               log2fc_df$average_intensity_2FL,
                               sqrt(log2fc_df$sd_intensity_2FL),
                               0.05,
                               log2fc = T)
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced

## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced

## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
log2fc_df$log2fc_95pc <- mapply(calculate_errors_y_over_x,
                               log2fc_df$average_intensity_Raver,
                               sqrt(log2fc_df$sd_intensity_Raver),
                               log2fc_df$average_intensity_2FL,
                               sqrt(log2fc_df$sd_intensity_2FL),
                               0.95,
                               log2fc = T)
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced

## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced

## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
ggplot(log2fc_df %>% filter(dox == "shTDP"), 
       aes(x = good_name, y = 100*(2^log2fc)-100, fill = TDP_REG)) +
  geom_bar(stat="identity") +
  ggtitle("Increase in intensity when using\nmutant TDP/Raver1 instead of WT") +
  ylab("% increase with 2FL") +
  geom_errorbar(aes(ymin = 100*(2^log2fc_5pc)-100, ymax = 100*(2^log2fc_95pc)-100, 
                    width=0.5)) +
  scale_fill_viridis_d() +
  xlab("mScarlet construct") +
  ggpubr::theme_pubclean() +
  ggeasy::easy_remove_legend()

ggsave("markdown_images/mScar/raver_rescue_quantification.pdf", height = 7, width = 10, units="cm")

rm(list=setdiff(ls(), "data_dir"))

Nanopore analysis of splicing

We did lots of Nanopore. We define a term: “productively spliced”. This refers the the % of transcripts that have splicing producing a mature mRNA with the full, uninterrupted CDS for producing the functional protein of interest, and not predicted to be NMD sensitive.

Nanopore analysis here relies primarily on CSV files generated by the python function “extract_splice_junctions_from_bam.py”

Additionally, some plots rely on “pileups” which were generated using a custom pysam-based script, “perform_pileups.py”.

Nanopore of mScarlet

# Read in splice junction CSVs
all_junction_counts <- Sys.glob(paste0(data_dir, 
                                "Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/*csv.gz"))

for(f in all_junction_counts){
  print(f)
  if(f == all_junction_counts[1]){
    n <- 0
  }
  
  bc_f <- word(word(f, sep="/", -1, -1), sep="_", 1, 1)
  bc_r <- word(word(word(f, sep="/", -1, -1), sep="_", 2, 2), sep="\\.", 1)
  
  this_csv <- read_csv(f, col_types = cols(.default = "?", mapping_quality = "d")) %>%
    mutate(bc_f = bc_f,
           bc_r = bc_r) 
  
  if(nrow(this_csv) == 0){
    next
  }
  
  n = n+1
  
  if(n == 1){
    all_csvs <- this_csv
  } else {
    all_csvs <- bind_rows(all_csvs, this_csv)
  }
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r9.csv.gz"
# Filter just for good alignments
primary <- all_csvs %>%
  mutate(flag_string = ifelse(is.na(flag_string), "", flag_string)) %>%
  filter(!str_detect(flag_string, "not primary|supplementary")) %>%
  filter(mapping_quality > 50) %>%
  mutate(reference = factor(reference, levels = c("d1s", "d2s", "d3s", "A10", 
                                                   "A11", "B1", "B2", "B4", "B5",
                                                   "B6", "B11", "B12", "C2", "C3",
                                                   "E1", "E4"))) %>%
  filter(!(reference %in% c("d1s", "d2s", "d3s"))) %>%
  filter(!str_detect(bc_f, "-1")) %>%
  filter(!str_detect(bc_r, "-1")) %>%
  mutate(barcode1 = as.numeric(str_sub(bc_f, 2, 3)),
         barcode2 = as.numeric(str_sub(bc_r, 2, 3)))

primary$junctions[is.na(primary$junctions)] <- ""

# Filter for those that have the expected barcode
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
  mutate(row = 1:n()) %>%
  filter(Plate == 1)
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
forward_bc_df = data.frame(barcode1 = 1:8, row = str_split("ABCDEFGH", "", simplify = T)[,1:8])

df2 <- primary %>%
  filter(barcode1 > 0 & barcode2 > 0) %>%
  ungroup() %>%
  left_join(forward_bc_df, by = "barcode1") %>%
  mutate(well = paste0(row, barcode2)) %>%
  mutate(Position = paste0(str_sub(well, 1, 1), ifelse(barcode2 <= 6, "1-6", "7-12"))) %>%
  left_join(positions, by = "Position") %>%  
  mutate(Construct = str_replace(str_replace(Construct, "Design ", "d"), " stronger", "s")) %>%
  filter(!(Construct %in% c("d1s", "d2s", "d3s"))) %>%
  ungroup() 

n_before_filter <- sum(df2$number_of_reads)

df2 <- df2 %>%
  filter(Construct == reference)

n_after_filter <-  sum(df2$number_of_reads)

accuracy = 100*(n_after_filter/n_before_filter)
print(paste("Accuracy =", accuracy, "%"))
## [1] "Accuracy = 99.3772200152704 %"
######## Analyse splicing #########

gtf <- read_tsv(paste0(data_dir, "Nanopore from first mScarlet mCherry 96 plates/combined_gtf.gtf"), 
                       col_names = 1:9)
## Rows: 63 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): X1, X2, X3, X6, X7, X8, X9
## dbl (2): X4, X5
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Calculate what the product splicing junctions should be from the GTF
gtf_productive <- gtf %>%
  group_by(X1) %>%
  filter(X9 %in% c("productive", "downstream")) %>%
  arrange(X4) %>%
  mutate(junc_start = X5-1,
         junc_end = lead(X4)-1) %>%
  dplyr::rename(Construct = X1) %>%
  filter(X9 == "productive") %>%
  mutate(this_splice_string = paste0(junc_start, "-", junc_end)) %>%
  mutate(productive_splice_string = paste0(this_splice_string, collapse = ";")) %>%
  mutate(just_downstream_splice_string = last(this_splice_string)) %>%
  select(Construct, productive_splice_string, just_downstream_splice_string) %>%
  distinct()

# Calculate the fraction that is productively spliced for each condition
df3 <- df2 %>%
  ungroup() %>%
  left_join(gtf_productive %>% select(Construct, productive_splice_string, just_downstream_splice_string)) %>%
  mutate(downstream_is_spliced = str_detect(junctions, just_downstream_splice_string)) %>%
  mutate(is_productive = productive_splice_string == junctions) %>%
  group_by(well) %>%
  mutate(n_productive = sum(as.numeric(is_productive)*number_of_reads)) %>%
  mutate(n_total = sum(number_of_reads)) %>%
  mutate(fraction_productive = n_productive/n_total) %>%
  ungroup() %>%
  mutate(treatment = factor(ifelse(barcode2 %% 2 == 0, "Dox", "NT"), levels = c("NT", "Dox"))) %>%
  select(well, treatment, Construct, fraction_productive) %>%
  unique()
## Joining with `by = join_by(Construct)`
df3_summary <- df3 %>%
  group_by(Construct, treatment) %>%
  mutate(m = mean(fraction_productive),
         sd = sd(fraction_productive)) %>%
  select(Construct, treatment, m, sd) %>%
  unique() %>%
  left_join(positions %>% select(Construct, Order)) %>%
  mutate(good_name = factor(ifelse(Construct == "mScarlet", "+ve", Order),
                            levels = c("+ve", "6", "7", "9",
                                       "10", "11", "12")))
## Joining with `by = join_by(Construct)`
ggplot(df3_summary, aes(x = treatment, fill = treatment, y = 100*m))+ 
  geom_bar(stat="identity") +
  geom_errorbar(aes(ymin = 100*(m-sd), ymax = 100*(m+sd)), position = "dodge", width = 0.3) +
  facet_wrap(~Order, scales = "free_y", nrow=2) +
  ylim(0, NA) +
  ylab("% Productive Transcript") +
  xlab("") +
  ggpubr::theme_pubr() +
  theme(legend.position = "right", plot.title = element_text(size = 6),
        text = element_text(size = 10),
        axis.text=element_text(size=8),
        axis.text.x=element_blank()) +
  ggeasy::easy_remove_legend() +
  xlab("") +
  ggsci::scale_fill_npg()

ggsave("markdown_images/mScar/96_well_productive_transcript_plot.pdf", 
       height = 5, width = 10, units = "cm")

rm(list=setdiff(ls(), "data_dir"))

Representatie pileup traces

Let’s also make a pileup of an example mScarlet construct to illustrate that the splicing is as designed by SpliceNouveau

files3 <- Sys.glob(paste0(data_dir,
                          "Nanopore from first mScarlet mCherry 96 plates/pileup 96 well/pileup/*.csv.gz"))

# Get index of files for cols 5 and 6 in the plate for the best internal cryptic
nt <- which(str_detect(files3, "f3_r11"))
dox <- which(str_detect(files3, "f3_r12"))

for(i in c(nt, dox)){
  this_file <- files3[i]
  
  this_pu <- read_csv(this_file) %>%
    # Ensure that we only look for the correct alignment
    group_by(reference_name) %>%
    mutate(n_this_rname = sum(n)) %>%
    ungroup() %>%
    filter(n_this_rname == max(n_this_rname)) %>%
    
    # Get coverage
    filter(position %% 1 == 0) %>%
    group_by(position) %>%
    mutate(score = ifelse(nt == "del", 0, n)) %>%
    mutate(n_at_pos = sum(score)) %>%
    select(n_at_pos) %>%
    unique() %>%
    mutate(sample = ifelse(i == nt, "NT", "shTDP")) %>%
    ungroup() %>%
    mutate(frac = n_at_pos / max(n_at_pos))
  
  if(i == nt){
    coverage_df <- this_pu
  } else {
    coverage_df <- bind_rows(coverage_df, this_pu)
  }
}
## Rows: 57675 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
## Rows: 53645 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
ggplot(coverage_df %>% ungroup(), aes(x = position, y = n_at_pos)) +
  geom_area() +
  facet_wrap(~sample, ncol=1, scales="free_y") +
  theme_void()

ggsave("markdown_images/mScar/B11_nano_trace.pdf")
## Saving 7 x 5 in image

Nanopore of AARS1 mCherry

Although the above mScarlet and this mCherry experiments were performed in parallel, the Nanopore was done separately

# Read in splice junction CSVs
all_csv <- Sys.glob(paste0(data_dir, 
                                "Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions design1 mcherry/*.gz"))

for(filename in all_csv){
  this_df <- read_csv(filename) %>%
    filter(!str_detect(flag_string, "not primary|supplementary")) %>%
    filter(reference == "r3",  # this is the code given to cryptic mCherry vector
           str_detect(junctions, "-1564"),
           mapping_quality > 50,
           first_pos < 80,
           last_pos > 1650) %>%
    mutate(productive = junctions == "175-339;426-655;1470-1564") %>%
    mutate(n_productive = sum(as.numeric(productive)*number_of_reads)) %>%
    mutate(frac_productive = n_productive/sum(number_of_reads)) %>%
    mutate(barcode = as.numeric(word(word(word(filename, -1, -1, sep="/"), 1, 1, sep="\\."), -1, -1, sep="_"))) #%>%
    # dplyr::select(barcode, frac_productive) %>%
    # unique()
  
  if(filename == all_csv[1]){
    all_df <- this_df
  } else {
    all_df <- bind_rows(all_df, this_df)
  }
}
## Rows: 298 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 185 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 247 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 263 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 246 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 263 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 336 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 183 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 515 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 247 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 269 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 168 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_df2 <- all_df %>%
  mutate(treatment = ifelse(barcode %% 2 == 0, "shTDP", "NT")) %>%
  mutate(construct = factor(ifelse(barcode <= 6, "Cryptic\nmCherry", "Constitutive\nmCherry"), 
                            levels=c("Cryptic\nmCherry", "Constitutive\nmCherry"))) %>%
  group_by(construct, treatment) %>%
  mutate(mean_pc = 100*mean(frac_productive),
         sd_pc = 100*sd(frac_productive)) %>%
  select(construct, treatment, mean_pc, sd_pc) %>%
  unique()

ggplot(all_df2 %>% filter(construct == "Cryptic\nmCherry"), 
       aes(x = construct, y = mean_pc, fill = treatment)) +
  geom_bar(position=position_dodge(width=0.9), stat = "identity", width=0.9) +
  geom_errorbar(aes(ymin = mean_pc-sd_pc, ymax = mean_pc+sd_pc), position = position_dodge(width=0.9), colour="black", width=0.4) +
  ggpubr::theme_pubclean() +
  ggeasy::easy_add_legend_title("") +
  ylab("% Productive\ntranscript") +
  xlab("") +
  theme(legend.position="right", text = element_text(size = 9)) +
  #ggeasy::easy_rotate_x_labels(side="right")  +
  ggsci::scale_fill_npg()

ggsave("markdown_images/mCherry/nanopore analysis design 1 mcherry.pdf", 
       height = 5, width = 7, units = "cm")

rm(list=setdiff(ls(), "data_dir"))

Nanopore analysis of Triple-Cryptic-Cre

files <- Sys.glob(paste0(data_dir, 
                                "Nanopore of triple cryptic cre/junction_counts/*.csv.gz"))

files <- files[which(!str_detect(files, "-1"))]

file <- files[1]
for(file in files){
  print(file)
  bc1 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 1, 1)
  bc2 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 2, 2)
  
  csv <- read_csv(file) %>%
    filter(reference == "triple_cryptic_cre") %>%
    filter(!str_detect(flag_string, "not primary|supplementary")) %>%
    filter(!is.na(junctions)) %>%
    mutate(first_cryptic = str_detect(junctions, "235-399;492-721"),
           second_cryptic = str_detect(junctions, "820-984;1083-1312"),
           third_cryptic = str_detect(junctions, "1589-1753;1862-2091")) %>%
    mutate(bc1 = bc1,
           bc2 = bc2) %>%
    mutate(dox = str_detect(bc2, "2|4|6"))
  
  if(file == files[1]){
    cryptic_df <- csv
  } else {
    cryptic_df <- bind_rows(cryptic_df, csv)
  }
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r1.csv.gz"
## Rows: 346 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r2.csv.gz"
## Rows: 552 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r3.csv.gz"
## Rows: 138 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r4.csv.gz"
## Rows: 558 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r5.csv.gz"
## Rows: 201 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r6.csv.gz"
## Rows: 350 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r1.csv.gz"
## Rows: 45 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r2.csv.gz"
## Rows: 75 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r3.csv.gz"
## Rows: 23 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r4.csv.gz"
## Rows: 114 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r5.csv.gz"
## Rows: 45 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r6.csv.gz"
## Rows: 96 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- cryptic_df %>%
  filter(bc1 == "f10") %>%
  mutate(ce_string = paste(as.numeric(first_cryptic), as.numeric(second_cryptic),
                           as.numeric(third_cryptic), sep="_")) %>%
  group_by(ce_string, bc2) %>%
  mutate(n = sum(number_of_reads)) %>%
  ungroup() %>%
  group_by(bc2) %>%
  select(bc2, ce_string, n, dox) %>%
  unique() %>%
  mutate(pc = 100*n/sum(n)) %>%
  ungroup() %>%
  select(ce_string, bc2, dox, pc) %>%
  group_by(ce_string, bc2) %>%
  mutate(ce_n = sum(as.numeric(str_split(ce_string, pattern = "_", simplify = T)))) %>%
  ungroup() %>%
  group_by(bc2, ce_n) %>%
  mutate(total_this_n = sum(pc)) %>%
  select(dox, bc2, ce_n, total_this_n) %>%
  unique() %>%
  ungroup() %>%
  group_by(ce_n, dox) %>%
  mutate(mean_pc = mean(total_this_n),
         sd_pc = ifelse(is.na(sd(total_this_n)), 0, sd(total_this_n))) %>%
  select(dox, ce_n, mean_pc, sd_pc) %>%
  unique() %>%
  bind_rows(data.frame(dox = FALSE, ce_n = 3, mean_pc = 0, sd_pc = 0)) # adding so it displays correctly


ggplot(df, aes(x = ce_n, y = mean_pc, fill=dox)) +
  geom_bar(stat="identity", position="dodge") +
  geom_errorbar(aes(x = ce_n, ymin = mean_pc-sd_pc, ymax = mean_pc + sd_pc), position=position_dodge(0.8), width =0.4) +
  xlab("Number of cryptic exons included") +
  ylab("%") +
  #ggtitle("Number of cryptic exons included with/without shTDP") +
  ggpubr::theme_pubclean() +
  ggeasy::easy_remove_legend() +
  ggsci::scale_fill_npg()

ggsave("markdown_images/Cre/Cre cryptic plot.pdf", height = 3.5, width = 8, units = "cm")

rm(list=setdiff(ls(), "data_dir"))

Heatmap of SpliceNouveau evolution trajectory

This used a special version of SpliceNouveau which saved the sequence and score after every successful iteration

downsample_amount = 10  # need to downsample otherwise difficult to see

df <- read_csv(paste0(data_dir, "mScarlet_evolution/mscar_track_all5.csv.tracked_scores.csv.gz")) %>%
  group_by(attempt) %>%
  mutate(max_score = max(score)) %>%
  ungroup() %>%
  filter(max_score == max(max_score)) %>%
  filter(position < 1250)
## Rows: 393660 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sequence
## dbl (6): attempt, iteration, position, donor_prob, acceptor_prob, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
y_df <- df %>%
  select(iteration) %>%
  unique() %>%
  mutate(y = 1:n())

df2 <- df %>%
  left_join(y_df) %>%
  select(position, donor_prob, acceptor_prob, y, score) %>%
  mutate(downsampled = downsample_amount * round(position / downsample_amount)) %>%
  pivot_longer(cols = c("donor_prob", "acceptor_prob")) %>%
  group_by(downsampled, y) %>%
  mutate(ds_value = max(value)) %>%
  ungroup() %>%
  select(downsampled, ds_value, y, score) %>%
  unique()
## Joining with `by = join_by(iteration)`
p1 <- ggplot(df2, aes(x = downsampled, y = fct_rev(ordered(y)), fill = (ds_value^0.35))) +
  geom_raster() +
  scale_fill_viridis_c() +
  ggpubr::theme_classic2() +
  xlab("Position in construct") +
  ylab("") +
  ggeasy::easy_remove_legend() +
  ggeasy::easy_remove_y_axis() +
  scale_x_discrete(position = "top") 
  
p2 <- ggplot(df2 %>%
         select(y, score) %>%
         unique(), aes(y = (score-min(score)), x = fct_rev(ordered(y)), fill = log(score-min(score)))) +
  geom_bar(stat="identity") +
  coord_flip() +
  scale_fill_viridis_c() +
  ggpubr::theme_classic2() +
  ggeasy::easy_remove_legend() +
  ggeasy::easy_remove_y_axis() +
  ylab("Fitness") +
  scale_y_discrete(position = "right") 

(p1 | p2) + plot_layout(widths = c(4, 1))

ggsave("markdown_images/mScar/evolution.pdf", height=10, width=13, units="cm")

Nanopore analysis of Gluc splicing

Let’s plot the Nanopore

For supplementary we want to plot all five, but for the main fig just plot the best one

files2 <- Sys.glob(paste0(data_dir, 
                          "Gluc/junction_counts/*csv.gz"))

files2 <- files2[!str_detect(files2, "no_match")]

for(file in files2){
  print(file)
  df <- data.frame(read_csv(file)) %>%
    mutate(f_bc = as.numeric(str_sub(word(word(file, -1, -1, sep="/"), 1, sep="_"), 2, -1))) %>%
    mutate(r_bc = as.numeric((str_sub(word(word(word(file, -1, -1, sep="/"), 2, sep="_"), 1, sep="\\."), 2, -1)))) %>%
    mutate(last_junction = word(junctions, -1, sep=";")) %>%
    mutate(last_intron_length = as.numeric(word(last_junction, 2, sep="-")) - as.numeric(word(last_junction, 1, sep="-"))) %>%
    mutate(first_junction = word(junctions, 1, sep=";"),
           second_junction = word(junctions, 2, sep=";")) %>%
    mutate(n_introns = str_count(junctions, ";") + 1)
  
  if(file == files2[1]){
    full_junc_df <- df
  } else {
    full_junc_df <- bind_rows(full_junc_df, df)
  }
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R1.csv.gz"
## Rows: 324 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R2.csv.gz"
## Rows: 1391 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R3.csv.gz"
## Rows: 713 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R4.csv.gz"
## Rows: 841 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R5.csv.gz"
## Rows: 1545 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R6.csv.gz"
## Rows: 1963 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R7.csv.gz"
## Rows: 1613 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R1.csv.gz"
## Rows: 715 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R2.csv.gz"
## Rows: 2862 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R3.csv.gz"
## Rows: 2032 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R4.csv.gz"
## Rows: 2362 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R5.csv.gz"
## Rows: 3566 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R6.csv.gz"
## Rows: 4072 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R7.csv.gz"
## Rows: 4069 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R1.csv.gz"
## Rows: 440 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R2.csv.gz"
## Rows: 306 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R3.csv.gz"
## Rows: 1305 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R4.csv.gz"
## Rows: 1169 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R5.csv.gz"
## Rows: 2504 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R6.csv.gz"
## Rows: 2953 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R7.csv.gz"
## Rows: 2488 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R1.csv.gz"
## Rows: 779 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R2.csv.gz"
## Rows: 2109 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R3.csv.gz"
## Rows: 1083 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R4.csv.gz"
## Rows: 2206 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R5.csv.gz"
## Rows: 3351 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R6.csv.gz"
## Rows: 3962 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R7.csv.gz"
## Rows: 3568 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R1.csv.gz"
## Rows: 182 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R2.csv.gz"
## Rows: 2952 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R3.csv.gz"
## Rows: 1878 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R4.csv.gz"
## Rows: 1763 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R5.csv.gz"
## Rows: 3183 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R6.csv.gz"
## Rows: 3607 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R7.csv.gz"
## Rows: 3535 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R1.csv.gz"
## Rows: 591 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R2.csv.gz"
## Rows: 2318 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R3.csv.gz"
## Rows: 2128 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R4.csv.gz"
## Rows: 2036 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R5.csv.gz"
## Rows: 3118 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R6.csv.gz"
## Rows: 3352 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R7.csv.gz"
## Rows: 4561 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Analyse whether they primarily align to the correct sequences and check
# whether they are productively spliced

full_junc_df2 <- full_junc_df %>%
  mutate(expected_rname = case_when(r_bc == 1 ~ "design1_Gluc",
                                    r_bc == 2 ~ "Gluc_prepared",
                                    r_bc == 3 ~ "Design2_Gluc_A2",
                                    r_bc == 4 ~ "Design2_Gluc_A3",
                                    r_bc == 5 ~ "Design2_Gluc_A5",
                                    r_bc == 6 ~ "Design2_Gluc_A6",
                                    r_bc == 7 ~ "Design2_Gluc_A7")) %>%
  filter(reference == expected_rname) %>%
  filter(!(r_bc == 1 & f_bc == 5)) %>%
  filter(!(r_bc == 2 & f_bc == 3)) %>%
  filter(last_intron_length == 94) %>%
  mutate(productively_spliced = junctions == case_when(r_bc == 1 ~ "106-270;357-586;1248-1342",
                                                       r_bc == 2 ~ "605-699",
                                                       r_bc == 3 ~ "252-416;477-706;996-1090",
                                                       r_bc == 4 ~ "252-416;477-706;996-1090",
                                                       r_bc == 5 ~ "252-416;477-706;996-1090",
                                                       r_bc == 6 ~ "252-416;477-715;1005-1099",
                                                       r_bc == 7 ~ "252-416;477-715;1005-1099")) %>%
  filter(!str_detect(flag_string, "not primary|supplementary")) %>%
  select(f_bc, r_bc, productively_spliced, expected_rname, number_of_reads) %>%
  unique() %>%
  group_by(f_bc, r_bc) %>%
  mutate(perc_productive = 100*sum(as.numeric(productively_spliced) * number_of_reads) / sum(number_of_reads)) %>%
  dplyr::select(f_bc, r_bc, expected_rname, perc_productive) %>%
  unique() %>%
  mutate(treatment = ifelse(f_bc %% 2 == 0, "shTDP", "NT"))

nano_summary <- full_junc_df2 %>%
  ungroup() %>% 
  mutate(good_name = case_when(r_bc == 1 ~ "Upstream",
                               r_bc == 2 ~ "Constitutive",
                               r_bc == 3 ~ "TDP-REGv2\n#1",
                               r_bc == 4 ~ "TDP-REGv2\n#2",
                               r_bc == 5 ~ "TDP-REGv2\n#3",
                               r_bc == 6 ~ "TDP-REGv2\n#4",
                               r_bc == 7 ~ "TDP-REGv2\n#5")) %>%
  mutate(paper_name = case_when(r_bc == 1 ~ "TDP-REGv1",
                                r_bc == 2 ~ "+ve",
                                r_bc == 3 ~ "TDP-REGv2\n#1",
                                r_bc == 4 ~ "TDP-REGv2\n#2",
                                r_bc == 5 ~ "TDP-REGv2\n#3",
                                r_bc == 6 ~ "TDP-REGv2\n#4",
                                r_bc == 7 ~ "TDP-REGv2\n#5")) %>%
  group_by(treatment, good_name) %>%
  mutate(mean_perc = mean(perc_productive), 
         sd_perc = sd(perc_productive)) %>%
  select(mean_perc, sd_perc, paper_name) %>%
  unique() %>%
  ungroup() %>%
  group_by(good_name) %>%
  mutate(log2fc = log2(max(mean_perc)/min(mean_perc)))
## Adding missing grouping variables: `treatment`, `good_name`
ggplot(nano_summary, 
       aes(x = paper_name, y = mean_perc, fill = treatment)) +
  geom_bar(stat="identity", position="dodge") +
  geom_errorbar(aes(ymin = mean_perc - sd_perc, ymax = mean_perc + sd_perc), 
                position="dodge") +
  ylab("% productively\nspliced") +
  xlab("") +
  theme_classic() +
  ggeasy::easy_add_legend_title("Treatment")+
  ggsci::scale_fill_npg()

ggsave("markdown_images/gluc/all_gluc_nanopore_results.pdf", height = 9, width = 19, units="cm")

nano_plot <- ggplot(nano_summary %>% filter(paper_name %in% c("TDP-REGv1", "TDP-REGv2\n#5")), 
       aes(x = paper_name, y = mean_perc, fill = treatment)) +
  geom_bar(stat="identity", position="dodge") +
  geom_errorbar(aes(ymin = mean_perc - sd_perc, ymax = mean_perc + sd_perc), 
                position="dodge") +
  ylab("% productively\nspliced") +
  xlab("") +
  theme_classic() +
  ggeasy::easy_add_legend_title("Treatment") +
  ggsci::scale_fill_npg()

Now let’s plot the plate reader of the relevant one and combine

df <- read_csv(paste0(data_dir, "Gluc/oscargluc 23 june 2022.csv")) %>%
  pivot_longer(cols = c(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`)) %>%
  mutate(column = as.numeric(name)) %>%
  select(-name) %>%
  filter(column < 7) %>%
  filter(!(column == 3 & row == "B")) %>% # remove bad well
  mutate(treatment = factor(ifelse(column %% 2 == 0, "shTDP", "NT"), levels = c("NT", "shTDP"))) %>%
  group_by(treatment, row) %>%
  mutate(av = mean(value),
         sd = sd(value))
## Rows: 8 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): row
## dbl (12): 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
name_df <- data.frame(row = c("A", "B", "C", "D", "E", "F", "G", "H"),
                      names = c("Design1", "Positive", "A2", "A3", "A5", "A6", "A7", "mScarlet"),
                      clear_name = factor(c("TDP-REGv1", "Constitutive", "TDP-REGv2\n#1", "TDP-REGv2\n#2", "TDP-REGv2\n#3", "TDP-REGv2\n#4", "TDP-REGv2\n#5", "mScarlet"),
                                          levels = c("Constitutive", "mScarlet", "TDP-REGv1", "TDP-REGv2\n#1", "TDP-REGv2\n#2", "TDP-REGv2\n#3", "TDP-REGv2\n#4", "TDP-REGv2\n#5")))

summary <- df %>%
  left_join(name_df) %>%
  ungroup() %>%
  select(clear_name, treatment, av, sd) %>%
  unique() %>%
  group_by(clear_name) %>%
  mutate(shTDP_av = max(ifelse(treatment == "shTDP", av, 0)),
         nt_av = max(ifelse(treatment == "NT", av, 0))) %>%
  mutate(log2fc = log2(shTDP_av/nt_av))
## Joining with `by = join_by(row)`
nice_names <- data.frame(clear_name = c("TDP-REGv2\n#3", "TDP-REGv2\n#5", "mScarlet", "TDP-REGv1"), 
                         clear_name2 = c("TDP-REGv2\n# 1", "TDP-REGv2\n# 2", "Negative", "AARS1-\nbased"),
                         clear_name3 = factor(c("", "TDP-REGv2\n#5", "Negative", "TDP-REGv1"),
                                              levels = c("TDP-REGv1", "TDP-REGv2\n#5", "Negative", "")))

summary3 <- filter(summary, clear_name %in% c("TDP-REGv2\n#3", "TDP-REGv2\n#5", "mScarlet", "TDP-REGv1")) %>%
  left_join(nice_names)
## Joining with `by = join_by(clear_name)`
p1 <- ggplot(summary3 %>%
         filter(clear_name3 != ""), aes(x = clear_name3, y = av/10000, fill = treatment)) +
  geom_bar(stat="identity", position = "dodge") +
  geom_errorbar(aes(ymin = (av-sd)/10000, ymax = (av+sd)/10000), position="dodge") +
  #geom_text(aes(label = round(log2fc,2), y = 3000000)) +
  xlab("") +
  ylab("Luminescence\nx10,000") +
  #ggtitle("TDP-43-regulated secreted luciferase signal") +
  ggpubr::theme_classic2() +
  #geom_text(aes(label = round(log2fc,2), y = 300)) +
  ggeasy::easy_add_legend_title("Treatment") +
  ggsci::scale_fill_npg() #+
  #ggeasy::easy_rotate_x_labels(side = "right")
p1

((nano_plot + ggeasy::easy_remove_legend()) | p1) + plot_layout(widths = c(1, 1.6))

ggsave("markdown_images/gluc/combined nanopore and luciferase.pdf", width = 19, height = 7, units = "cm")

Produce some representative pileups

This is just to show that the splicing is exactly as predicted

files3 <- Sys.glob(paste0(data_dir,
                          "Gluc/pileup/*.csv.gz"))

# Get index of files for cols 5 and 6 in the plate for the best internal cryptic
nt <- which(str_detect(files3, "F5_R7"))
dox <- which(str_detect(files3, "F6_R7"))


for(i in c(nt, dox)){
  this_file <- files3[i]
  
  this_pu <- read_csv(this_file) %>%
    filter(insertion_number == 0) %>%
    # Ensure that we only look for the correct alignment
    group_by(reference_name) %>%
    mutate(n_this_rname = sum(n)) %>%
    ungroup() %>%
    filter(n_this_rname == max(n_this_rname)) %>%
    filter(position %% 1 == 0) %>%
    group_by(position) %>%
    mutate(score = ifelse(nt == "del", 0, n)) %>%
    mutate(n_at_pos = sum(score)) %>%
    select(n_at_pos) %>%
    unique() %>%
    mutate(sample = ifelse(i == nt, "NT", "shTDP")) %>%
    ungroup() %>%
    mutate(frac = n_at_pos / max(n_at_pos))
  
  if(i == nt){
    coverage_df <- this_pu
  } else {
    coverage_df <- bind_rows(coverage_df, this_pu)
  }
}
## Rows: 25482 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
## Rows: 33716 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
ggplot(coverage_df %>% ungroup(), aes(x = position, y = n_at_pos)) +
  geom_area() +
  facet_wrap(~sample, ncol=1, scales="free_y") +
  theme_void() +
  ggsci::scale_fill_npg()

ggsave("markdown_images/gluc/gluc_internal_5_pileups.pdf")
## Saving 7 x 5 in image
rm(list=setdiff(ls(), "data_dir"))

AAV quantification

We transduced mice with an AAV expressing mCherry. The images were manually quantified (with blinding)

quants <- read_csv(paste0(data_dir, "AAV/quantifications_from_jo_fixed.csv")) %>%
    filter(!is.na(triple)) %>%
  dplyr::rename(tdp_and_vacht = tdp_and_vacth) %>%
  mutate(vacht = as.numeric(vacht)) %>%
  # Compute some useful values
  mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
         n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
  
  group_by(id) %>% 
  # Compute summaries per animal
  mutate(total_n_mcherry = sum(mcherry),
         total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
         total_n_vacht = sum(vacht),
         total_n_vacht_and_tdp = sum(tdp_and_vacht),
         total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
         total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
         total_n_triple = sum(triple),
         total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
  
  select(id, contains("total")) %>%
  unique() %>%
  
  # Answer question 1:
  mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
  
  # Answer question 2:
  mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
  
  # Answer question 4:
  mutate(n_vacht_mcherry_and_tdp = total_n_triple,
         n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
         n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
  mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
  
  mutate(genotype = ifelse(id %in% c(29, 30, 31, 40, 206), "cKO", "Control")) %>%
  
  # Add entry for cKO in which no fluorescence was seen
  bind_rows(data.frame(id = -1, genotype = "cKO")) %>%
  
  ungroup() %>%
  arrange(desc(genotype)) %>%
  mutate(label = 1:n())
## Rows: 444 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): section, vacht
## dbl (5): id, mcherry, vacht_and_mcherry, tdp_and_vacth, triple
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quants[is.na(quants)] <- 0

quants_a244 <- quants

ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
                   fill = genotype)) +
  geom_bar(stat="identity") +
  facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
                     scale = "free_x") +
  ylab("% MNs with mCherry") +
  theme_classic() +
  ggsci::scale_fill_npg() +
  xlab("Animal number") +
  ggeasy::easy_add_legend_title("Genotype")

ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")

# ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
#                    fill = genotype)) +
#   geom_bar(stat="identity") +
#   facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
#                      scale = "free_x") +
#   ylab("% MNs with TDP-43") +
#   theme_classic() +
#   ggsci::scale_fill_npg() +
#   xlab("Animal number") +
#   ggeasy::easy_add_legend_title("Genotype")
# 
# ggsave("markdown_images/AAV/pc_TDP43_MNs.pdf", height = 4.5, width = 8, units="cm")

Now let’s do the same but for the internal cryptic mScarlet

quants <- read_csv(paste0(data_dir, "AAV/a241 quantifications.csv")) %>%
    filter(!is.na(triple)) %>%
    mutate(mcherry = vacht_and_mcherry) %>%
  mutate(vacht = as.numeric(vacht)) %>%
  # Compute some useful values
  mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
         n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
  
  group_by(id) %>% 
  # Compute summaries per animal
  mutate(total_n_mcherry = sum(mcherry),
         total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
         total_n_vacht = sum(vacht),
         total_n_vacht_and_tdp = sum(tdp_and_vacht),
         total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
         total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
         total_n_triple = sum(triple),
         total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
  
  select(id, contains("total")) %>%
  unique() %>%
  
  # Answer question 1:
  mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
  
  # Answer question 2:
  mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
  
  # Answer question 4:
  mutate(n_vacht_mcherry_and_tdp = total_n_triple,
         n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
         n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
  mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
  
  mutate(genotype = ifelse(id %in% c(254, 256, 257, 283, 284), "cKO", "Control")) %>%
  
  # Add entry for cKO in which no fluorescence was seen
  bind_rows(data.frame(id = 255, genotype = "cKO")) %>%

  ungroup() %>%
  arrange(desc(genotype)) %>%
  mutate(label = 1:n())
## New names:
## Rows: 93 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): section dbl (7): id, vacht, vacht_and_mcherry, tdp_and_vacht, triple, cKO,
## WT lgl (2): ...7, ...8
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...7`
## • `` -> `...8`
quants[is.na(quants)] <- 0

quants_a241 <- quants


ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
                   fill = genotype)) +
  geom_bar(stat="identity") +
  facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
                     scale = "free_x") +
  ylab("% MNs with mCherry") +
  theme_classic() +
  ggsci::scale_fill_npg() +
  xlab("Animal number") +
  ggeasy::easy_add_legend_title("Genotype")

ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")

# ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
#                    fill = genotype)) +
#   geom_bar(stat="identity") +
#   facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
#                      scale = "free_x") +
#   ylab("% MNs with TDP-43") +
#   theme_classic() +
#   ggsci::scale_fill_npg() +
#   xlab("Animal number") +
#   ggeasy::easy_add_legend_title("Genotype")

And now the same for the positive control vector

quants <- read_csv(paste0(data_dir, "AAV/A245_quantifications3.csv")) %>%
    filter(!is.na(triple)) %>%
    mutate(mcherry = vacht_and_mcherry) %>%
  mutate(vacht = as.numeric(vacht)) %>%
  # Compute some useful values
  mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
         n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
  
  group_by(Aminal_ID) %>% 
  # Compute summaries per animal
  mutate(total_n_mcherry = sum(mcherry),
         total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
         total_n_vacht = sum(vacht),
         total_n_vacht_and_tdp = sum(tdp_and_vacht),
         total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
         total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
         total_n_triple = sum(triple),
         total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
  
  select(Aminal_ID, contains("total")) %>%
  distinct() %>%
  
  # Answer question 1:
  mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
  
  # Answer question 2:
  mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
  
  # Answer question 4:
  mutate(n_vacht_mcherry_and_tdp = total_n_triple,
         n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
         n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
  mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
  
  mutate(genotype = "Control") %>%
  ungroup() %>%
  arrange(desc(genotype)) %>%
  mutate(label = 1:n())
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Section
## dbl (5): Aminal_ID, vacht, vacht_and_mcherry, tdp_and_vacht, triple
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quants_a245 <- quants


ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
                   fill = genotype)) +
  geom_bar(stat="identity") +
  facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
                     scale = "free_x") +
  ylab("% MNs with mCherry") +
  theme_classic() +
  ggsci::scale_fill_npg() +
  xlab("Animal number") +
  ggeasy::easy_add_legend_title("Genotype")

#ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")

ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
                   fill = genotype)) +
  geom_bar(stat="identity") +
  facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
                     scale = "free_x") +
  ylab("% MNs with TDP-43") +
  theme_classic() +
  ggsci::scale_fill_npg() +
  xlab("Animal number") +
  ggeasy::easy_add_legend_title("Genotype")

Now let’s combine the quantifications for all three vectors

combined_quants <- bind_rows(quants_a244 %>% mutate(vector = "TDP-REGv1\nmCherry"),
                             quants_a241 %>% mutate(vector = "TDP-REGv2\nmScarlet")) %>%
  bind_rows(quants_a245 %>% mutate(vector = "+ve\nmScarlet")) %>%
  mutate(vector = factor(vector, levels = c("TDP-REGv1\nmCherry", "TDP-REGv2\nmScarlet", "+ve\nmScarlet"))) %>%
  mutate(genotype = factor(genotype, levels = c("Control", "cKO")))

ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_mcherry,
                            fill = genotype)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 4, dotsize = 1.1) +
  facet_wrap(~vector, scales = "free_x") +
  ylab("% MNs RFP +ve") +
  ggpubr::theme_classic2() +
  xlab("") +
  ggeasy::easy_add_legend_title("Genotype") +
  ggsci::scale_fill_npg()

ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_mcherry,
                            colour = genotype)) +
  
  #geom_quasirandom(varwidth = TRUE) +
  geom_beeswarm(cex = 4, method = "compactswarm", size = 1.8, alpha = 0.8) +
  facet_wrap(~vector, scales = "free_x") +
  ylab("% MNs RFP +ve") +
  ggpubr::theme_classic2() +
  xlab("") +
  ggeasy::easy_add_legend_title("Genotype") +
  ggsci::scale_fill_npg()

ggsave("markdown_images/AAV/pc_TDP43_MNs_combined_quants_incl_a245.pdf", height = 5.5, width = 12, units="cm")


# ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_TDP, 
#                             fill = genotype)) +
#   geom_dotplot(binaxis = "y", stackdir = "center") +
#   facet_wrap(~factor(genotype, levels = c("Control", "cKO")), 
#                      scale = "free_x") +
#   ylab("% MNs with TDP-43") +
#   theme_classic() +
#   ggsci::scale_fill_npg() +
#   xlab("Animal number") +
#   ggeasy::easy_add_legend_title("Genotype") +
#   facet_wrap(~vector)

Incucyte with 12QN TDP-43

We transfected various mScarlet reporters into HEK293T cells with either SNAP-12QN-TDP-43 or just a plain Halo tag not attached to anything. We added a little bit of mGreenLantern plasmid to all of them too, just to check that the transfection worked. Note that the first two rows went a little bit wrong during transfection - volume was lost, so the last couple didn’t work.

Also, it seems like the Design 1 AARS1-mCherry transfection failed. The plasmid had been stored at 4 degrees (instead of frozen) for an extended period so I assume this is why. I saw similar problems in another transfection.

We’ll ignore this row.

generate_full_image <- function(combined_positions, all_images, n_rows, 
                                n_columns=6, dim_image=200, spacing_images=5,
                                spacing_wells=30, background_quantile=0.97){
  
  # This function makes a plot from incucyte data.
  # You need to supply a "combined_positions" dataframe that has values for
  # plate, well and Order
  
  # Currently it only works when you have four images per well (stupidly named "replicates")
  
  height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
  width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
  
  full_image <- matrix(nrow = height, ncol = width, 0)
  
  background <- 10000000 # set arbitrarily large initial value
  
  for(image in all_images){
    filename <- word(image, sep="/", start=-1, end = -1)
    plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
    
    well = word(filename, sep="_", start=2, end=2)
    plate_column = as.numeric(str_sub(well, 2, 3))
    
    image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
    image_row = unique(combined_positions$Order[which(combined_positions$well == well & 
                                                        combined_positions$Plate == plate)])
    
    if(is.na(image_row)){
      next
    }
    
    if("replicate" %in% colnames(combined_positions)){
      replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
    } else {
      replicate = as.numeric(word(filename, sep="_", start=3, end=3))
    }
    
    image_matrix <- readImage(image)
    smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
    
    well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    
    image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
    
    well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    
    image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
    
    full_image[image_top_left_y:(image_top_left_y+dim_image-1),
               image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
    
    this_background <- quantile(smaller, 0.05)
    
    if(this_background < background){
      background <- this_background
    }
  }
  
  full_image2 <- full_image - background
  img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
  
  return(img_invert)
}

red_12qn <- Sys.glob(paste0(data_dir, "12qn from incucyte/red/*"))

just_a11 = red_12qn[which(str_detect(red_12qn, "VID411_D"))]


positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 2))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  filter(row == "D") %>%
  mutate(Order =1) %>%
  mutate(replicate = case_when(replicate == 2 ~ 3,
                               replicate == 3 ~ 2,
                               T ~ replicate))
## Joining with `by = join_by(row)`
im <- generate_full_image(positions, just_a11, n_rows = 1, background_quantile = 0.995)

green_12qn <- Sys.glob(paste0(data_dir, "12qn from incucyte/green/*"))

just_a11_green = green_12qn[which(str_detect(red_12qn, "VID411_D"))]

positions_green <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 2))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  filter(row == "D") %>%
  mutate(Order =1) %>%
  mutate(replicate = case_when(replicate == 2 ~ 3,
                               replicate == 3 ~ 2,
                               T ~ replicate))
## Joining with `by = join_by(row)`
im_green <- generate_full_image(positions_green, just_a11_green, n_rows = 1, background_quantile = 0.97)

rgb_red <- rgbImage(red = (1-im), green = NULL, blue = NULL)
display(rgb_red)

writeImage(rgb_red, "markdown_images/12QN incucyte/red_a11.png", quality=99)

rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)

writeImage(rgb_green, "markdown_images/12QN incucyte/green_a11.png", quality=99)

Prime editing

Calculate the % editing

all_files <- Sys.glob(paste0(data_dir, "Prime editing/pileup 8th nov 22/*.csv.gz"))

all_files <- all_files[!str_detect(all_files, "no_match")]

for(file in all_files){
  print(file)
  code = word(file, -1, -1, sep="/")
  
  this_df <- read_csv(file) %>%
    mutate(filename = code)
  
  if(file == all_files[1] && nrow(this_df) > 0){
    all_df <- this_df
  } else {
    if(nrow(this_df) > 0){
      all_df <- bind_rows(all_df, this_df)
    }
  }
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_1.csv.gz"
## Rows: 12885 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_2.csv.gz"
## Rows: 13622 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_3.csv.gz"
## Rows: 13359 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_4.csv.gz"
## Rows: 11500 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_5.csv.gz"
## Rows: 19310 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_6.csv.gz"
## Rows: 24537 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_7.csv.gz"
## Rows: 18299 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_8.csv.gz"
## Rows: 19410 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_1.csv.gz"
## Rows: 20661 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_2.csv.gz"
## Rows: 16023 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_3.csv.gz"
## Rows: 20435 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_4.csv.gz"
## Rows: 16731 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_5.csv.gz"
## Rows: 22952 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_6.csv.gz"
## Rows: 27569 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_7.csv.gz"
## Rows: 21558 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_8.csv.gz"
## Rows: 20836 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
just_unc <- all_df %>%
  filter(reference_name == 0) %>%
  arrange(filename, position) %>%
  group_by(filename, position, insertion_number) %>%
  mutate(n_this_pos = sum(n)) %>%
  mutate(frac_this_pos = n/n_this_pos) %>%
  ungroup() %>%
  mutate(f_bc = as.numeric(word(filename, 5, 5, sep="_")),
         r_bc = as.numeric(word(filename, 9, 9, sep="_|\\."))) %>%
  mutate(treatment = ifelse(f_bc == 11, "Control", "shTDP"),
         Plasmid = ifelse(r_bc <= 4, "12C", "PEMax"))

peg10_561 <- just_unc %>%
  filter(position == 561) %>%
  filter(position == 561 & nt %in% c("G", "C")) %>%
  filter(insertion_number == 0) %>%
  ungroup() %>%
  group_by(filename) %>%
  mutate(frac_c = ifelse(sum(as.numeric(nt == "C")) == 0, 0, max(ifelse(nt == "C", frac_this_pos, -1)))) %>%
  mutate(new_name = ifelse(Plasmid == "PEMax", "Constitutive", "Cryptic"))

peg10_561_summary2 <- peg10_561 %>%
  ungroup() %>%
  dplyr::select(Plasmid, treatment, frac_c) %>%
  ungroup() %>%
  group_by(Plasmid, treatment) %>%
  mutate(mean_p = 100*mean(frac_c),
         sd_p = 100*sd(frac_c)) %>%
  unique() %>%
  mutate(new_name = ifelse(Plasmid == "PEMax", "+ve\nPEMax", "Cryptic\nPEMax"))

ggplot(peg10_561_summary2, aes(x = new_name, y = 100*frac_c, fill = treatment)) +
  geom_dotplot(binaxis = "y", position = "dodge",dotsize = 2, binwidth = 0.5, stackdir = "center") +
  ggpubr::theme_pubclean() +
  ggeasy::easy_add_legend_title("Treatment") +
  ylab("% Edited") +
  #geom_errorbar(position="dodge", aes(ymin = mean_p-sd_p, ymax = mean_p+sd_p)) +
  #ggtitle("Editing of UNC13A cryptic\ndonor splice site") +
  xlab("") +
  ggsci::scale_color_npg() +
  ggsci::scale_fill_npg() +
  ggeasy::easy_move_legend("right")

ggsave("markdown_images/PE/dotplot_nanopore_summary.pdf", height = 6.5, width = 8, units = "cm")

TDP-43 Raver

Analyse Qiaxcel of 2FL mutant

These datasets are from a capillary electrophoresis machine (“QIAxcel”)

The below R scripts use the raw data from this machine to produce the desired plots

raw_filename <- paste0(data_dir, "Raver/2FL qiaxcel/design2tdptests_20221015_061200_Rw.csv")

df <- read_tsv(raw_filename) %>%
  pivot_longer(cols = contains("RFU")) %>%
  mutate(sample = as.numeric(str_sub(name,6, 7)))  %>%
  select(Time, Row, sample, value) %>%
  mutate(plasmid = case_when(
    Row == "A" & sample <= 6 ~ "mScarlet (-ve control)",
    Row == "B" & sample <= 6 ~ "Plasmid B02",
    Row == "C" & sample <= 6 ~ "Plasmid B03",
    Row == "D" & sample <= 6 ~ "Plasmid B04",
    Row == "E" & sample <= 6 ~ "Plasmid B05",
    Row == "F" & sample <= 6 ~ "Plasmid B06",
    Row == "A" & sample > 6 ~ "Plasmid B09",
    Row == "B" & sample > 6 ~ "Plasmid B10",
    Row == "C" & sample > 6 ~ "Plasmid B11",
    Row == "D" & sample > 6 ~ "Plasmid B07",
    Row == "E" & sample > 6 ~ "Plasmid B08",
    Row == "F" & sample < 11 ~ "Plasmid B11 -RT",
    Row == "F" & sample == 11 ~ "Water input",
    Row == "F" & sample == 12 ~ "+ve control")) %>%
  mutate(actual_sample_no = ifelse(sample %% 6 == 0, 6, sample %% 6)) %>%
  group_by(plasmid, actual_sample_no) %>%
  mutate(norm_value = value/max(value)) %>%
  arrange(Time) %>%
  mutate(rollmean = zoo::rollmean(norm_value, k=10, na.pad=T)) %>%
  mutate(rollmean2 = ifelse(is.na(rollmean), 0, rollmean)) %>%
  # Define some backgrounds
  mutate(is_above_background = case_when(plasmid =="Plasmid B02" ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
                                         plasmid =="mScarlet (-ve control)" ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
                                         plasmid =="Plasmid B07" & actual_sample_no==5 ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
                                         plasmid =="Water input" & actual_sample_no==5 ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
                                         T ~ rollmean2 > quantile(norm_value, 0.95, na.rm=T))) %>%
  mutate(lower_marker = min(ifelse(is_above_background, Time, 1000000000)),
         upper_marker = max(ifelse(is_above_background, Time, -100000))) %>%
  mutate(normalised_time = round((Time-lower_marker)/(upper_marker-lower_marker),3)) %>%
  filter(normalised_time > 0 & normalised_time < 1) %>%
  #filter(!(plasmid == "Plasmid B10" & actual_sample_no == 6)) %>%
  mutate(treatment = ifelse(actual_sample_no %% 2 == 0, "shTDP", "NT"))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 21809 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# First, let's plot these all
  
ggplot(df,
       aes(x=actual_sample_no, y=normalised_time, fill=norm_value)) +
  geom_tile() +
  scale_fill_gradient(low="white", high="black") +
  theme_classic() +
  facet_wrap(~factor(plasmid)) +
  ggtitle("10 different TDP-43/Raver1 vectors (with 2FL mutation to block autoregulation)",
          "1,3,5=NT, 2,4,6=shTDP") +
  ylab("") +
  xlab("") +
  ggeasy::easy_remove_legend() +
  ylim(0.2,0.9)
## Warning: Removed 26298 rows containing missing values (`geom_tile()`).

# Now, let's plot the 2FL gradient to make it pretty

ordered_names <- data.frame(plasmid = c("Plasmid B03", "Plasmid B02",
                                        "Plasmid B04", "Plasmid B07",
                                        "Plasmid B08", "Plasmid B05",
                                        "Plasmid B06", "Plasmid B09",
                                        "Plasmid B11", "Plasmid B10"),
                            construct_number = factor(1:10))

df2 <- df %>%
  inner_join(ordered_names)
## Joining with `by = join_by(plasmid)`
ggplot(df2 %>% filter(actual_sample_no <= 2),
       aes(x=construct_number, y=normalised_time, fill=norm_value)) +
  geom_tile() +
  scale_fill_gradient(low="white", high="black") +
  theme_classic() +
  facet_wrap(~treatment, ncol = 1) +
  ylab("") +
  xlab("Construct number") +
  ggeasy::easy_remove_legend() +
  ylim(0.25,0.57) +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())
## Warning: Removed 16472 rows containing missing values (`geom_tile()`).

ggsave("markdown_images/Raver/2FL_gradient.pdf", height = 6, width = 6, units="cm")
## Warning: Removed 16472 rows containing missing values (`geom_tile()`).

Analyse RT-PCRs of UNC13A, STMN2 and AARS1

To determine whether these constructs resist cryptic splicing, we performed RT-PCRs against UNC13A, STMN2 and AARS1. We ran these on a QIAxcel.

To analyse the QIAxcel data, I wrote a small R package, QIAxcelR.

The samples are loaded in a slightly funky order. The positions are in the “QIAxcel positions.csv” file. Row A refers to the first row of each pair, and Row D refers to the second row of each pair. Column positions and first versus second row is consistent for all pairs.

Annoyingly I lost one replicate of mScarlet untreated. However, this is not a particularly important sample because all of the untreated samples tend to be very similar (mScarlet dox/shTDP is much more important, for example)

First, let’s analyse unc13a in isolation

py <- import("pybaselines")
scipy <- import("scipy")

unc_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/Unc13a 21st June/C220830A16_2023-06-22_2125_20230622_075330_Rw.csv")

positions <- read_csv(paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/Unc13a 21st June/QIAxcel positions.csv")) %>%
  pivot_longer(cols = !contains("Row")) %>%
  dplyr::rename(row = `Row/col`, column = name, sample_name = value) %>%
  mutate(replicate = as.numeric(word(sample_name, -1)),
         plasmid = factor(word(sample_name, 1), levels = c("RV", "B5", "B11", "mScar", "-ve")),
         treatment = ifelse(str_detect(sample_name, "NT"), "NT", "shTDP")) %>%
  mutate(column = as.numeric(column))
## Rows: 2 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Row/col, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `replicate = as.numeric(word(sample_name, -1))`.
## Caused by warning:
## ! NAs introduced by coercion
df <- parse_qiaxcel_output(unc_csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 7270 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df)

tweak_df <- data.frame(unique_id = "D12", shift = 0, multiply = 0.97)

df3 <- tweak_positions(df2, tweak_df) 

df4 <- df3 %>%
  mutate(column = as.numeric(column)) %>%
  left_join(positions, by = c("row","column")) %>%
  mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))

unc_final_data <- df4

p1 <- ggplot(df4, 
       aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  ylim(0.55,0.75) +
  facet_wrap(~plasmid) +
  ggtitle("Cryptic splicing of UNC13A", "1,3,5 = NT, 2,4,5 = shTDP") +
  xlab("Sample number") +
  ylab("") +
  ggeasy::easy_remove_legend()


yo <- find_molar_ratios(df3, lower_band_pos = 0.58, lower_band_width = 0.02, lower_band_nts = 405,
                        upper_band_pos= 0.715, upper_band_width = 0.03, upper_band_nts = 405+128) %>%
  left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
  filter(!is.na(replicate)) %>%
  ungroup() %>%
  group_by(plasmid, treatment) %>%
  mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
  mutate(average_psi = mean(psi)) %>%
  mutate(sd_psi = sd(psi)) %>%
  mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
                                plasmid == "B5" ~ "Cryptic\nRaver 6",
                                plasmid == "B11" ~ "Cryptic\nRaver 9",
                                plasmid == "mScar" ~ "mScarlet\ncontrol"),
         levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
unc_ratios <- yo

p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("Cryptic exon PSI") +
  geom_errorbar(aes(ymin = average_psi-sd_psi,
                            ymax = average_psi+sd_psi),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggtitle("UNC13A cryptic exon PSI (n=3)") +
  ggeasy::easy_add_legend_title("Treatment") +
  xlab("")

p1 / p2
## Warning: Removed 81167 rows containing missing values (`geom_tile()`).

Let’s also make a quick plot of representative examples

ggplot(unc_final_data %>% filter(actual_sample_no %in% c(3, 4)) %>%
           mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
                                plasmid == "B5" ~ "Cryptic\nRaver 6",
                                plasmid == "B11" ~ "Cryptic\nRaver 9",
                                plasmid == "mScar" ~ "mScarlet\ncontrol"),
         levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9"))) %>%
         mutate(sample_label = ifelse(actual_sample_no == 3, "NT", "shTDP")), 
       aes(y = index_for_plotting, fill = corrected_value, x = sample_label)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  ylim(0.53,0.75) +
  facet_wrap(~good_names, ncol = 4) +
  xlab("Sample number") +
  ylab("") +
  ggeasy::easy_remove_legend() +
  ggeasy::easy_rotate_x_labels(side = "right") +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  xlab("") +
  ggeasy::easy_remove_y_axis() 
## Warning: Removed 26878 rows containing missing values (`geom_tile()`).

ggsave("markdown_images/Raver/unc qiaxcel examples.pdf", height = 5, width = 8, units="cm")
## Warning: Removed 26878 rows containing missing values (`geom_tile()`).

Now let’s analyse STMN2 in isolation

stmn_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/4 targets 26th June/C220830A16_2023-06-27_2127_20230627_072754_Rw.csv")

df <- parse_qiaxcel_output(stmn_csv) %>%
  filter(Row %in% c("C", "D"))
## Rows: 29080 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df %>% 
                              filter(!is.na(value2)) %>%
                              mutate(value2 = as.numeric(value2)))

tweak_df <- data.frame(unique_id = c("C10", "D10", "D11", "D12"), shift = -0.008, multiply = 1)

df3 <- tweak_positions(df2, tweak_df) 

df4 <- df3 %>%
  mutate(column = as.numeric(column)) %>%
  left_join(positions %>%
  mutate(row = ifelse(row == "A", "C", "D")), by = c("row","column")) %>%
  #distinct(row, column, plasmid, replicate, treatment) %>%
  mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))

p1 <- ggplot(df4, 
       aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  facet_wrap(~plasmid) +
  ggtitle("Cryptic splicing of STMN2", "1,3,5 = NT, 2,4,5 = shTDP") +
  xlab("Sample number") +
  ylab("") +
  ggeasy::easy_remove_legend() +
  ylim(0.17, 0.27)
p1
## Warning: Removed 84152 rows containing missing values (`geom_tile()`).

stmn_final_data <- df4

yo <- find_molar_ratios(df3, lower_band_pos = 0.19, lower_band_width = 0.02, lower_band_nts = 155,
                        upper_band_pos= 0.23, upper_band_width = 0.02, upper_band_nts = 188) %>%
  left_join(positions %>% 
              mutate(row = ifelse(row == "A", "C", "D")) %>% 
              mutate(unique_id = paste0(row, column))) %>%
  filter(!is.na(replicate)) %>%
  ungroup() %>%
  group_by(plasmid, treatment) %>%
  mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
  mutate(average_psi = mean(psi)) %>%
  mutate(sd_psi = sd(psi)) %>%
  mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
                                plasmid == "B5" ~ "Cryptic\nRaver 6",
                                plasmid == "B11" ~ "Cryptic\nRaver 9",
                                plasmid == "mScar" ~ "mScarlet\ncontrol"),
         levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
stmn_ratios <- yo

p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("Cryptic exon PSI") +
  geom_errorbar(aes(ymin = average_psi-sd_psi,
                            ymax = average_psi+sd_psi),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggtitle("STMN2 cryptic exon PSI (n=3)") +
  ggeasy::easy_add_legend_title("Treatment") +
  xlab("")
p2

p1 / p2
## Warning: Removed 84152 rows containing missing values (`geom_tile()`).

And now AARs1

aars_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/aars1 3rd july/C220830A16_2023-07-04_2131_20230704_073449_Rw.csv")

df <- parse_qiaxcel_output(aars_csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 7269 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df3 <- preprocess_dataframe(df)

df4 <- df3 %>%
  mutate(column = as.numeric(column)) %>%
  left_join(positions, by = c("row","column")) %>%
  mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))

aars_final_data <- df4

p1 <- ggplot(df4, 
       aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  facet_wrap(~plasmid) +
  ggtitle("Cryptic splicing of AARS1", "1,3,5 = NT, 2,4,5 = shTDP") +
  xlab("Sample number") +
  ylab("") +
  ylim(0.3, 0.7) +
  ggeasy::easy_remove_legend()
p1
## Warning: Removed 73291 rows containing missing values (`geom_tile()`).

yo <- find_molar_ratios(df3, lower_band_pos = 0.35, lower_band_width = 0.05, lower_band_nts = 245,
                        upper_band_pos= 0.5, upper_band_width = 0.06, upper_band_nts = 245+87) %>%
  left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
  filter(!is.na(replicate)) %>%
  ungroup() %>%
  group_by(plasmid, treatment) %>%
  mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
  mutate(average_psi = mean(psi)) %>%
  mutate(sd_psi = sd(psi)) %>%
  mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
                                plasmid == "B5" ~ "Cryptic\nRaver 6",
                                plasmid == "B11" ~ "Cryptic\nRaver 9",
                                plasmid == "mScar" ~ "mScarlet\ncontrol"),
         levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
aars_ratios <- yo

p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("Cryptic exon PSI") +
  geom_errorbar(aes(ymin = average_psi-sd_psi,
                            ymax = average_psi+sd_psi),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggtitle("AARS1 cryptic exon PSI (n=3)") +
  ggeasy::easy_add_legend_title("Treatment") +
  xlab("")
p2

p1 / p2
## Warning: Removed 73291 rows containing missing values (`geom_tile()`).

Now let’s make some nice summary plots

We’ll use smaller names so that it fits on the page

triple_df <- bind_rows(aars_ratios %>%
                         mutate(target = "AARS1"), 
                       bind_rows(unc_ratios %>% mutate(target = "UNC13A"), 
                                 stmn_ratios %>% mutate(target = "STMN2"))) %>%
  ungroup() %>%
  mutate(target = factor(target, levels = c("UNC13A", "STMN2", "AARS1"))) %>%
  mutate(small_name = factor(case_when(good_names == "mScarlet\ncontrol" ~ "mScarlet",
                                good_names == "Constitutive\nRaver" ~ "Constitutive",
                                good_names == "Cryptic\nRaver 6" ~ "Cryptic #6",
                                good_names == "Cryptic\nRaver 9" ~ "Cryptic #9"),
                             levels = c("mScarlet", "Constitutive", "Cryptic #6", "Cryptic #9")))

ggplot(triple_df %>% distinct(average_psi, target, .keep_all=T), aes(x = small_name, y = average_psi, fill = treatment)) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("Cryptic exon PSI") +
  geom_errorbar(aes(ymin = average_psi-sd_psi,
                            ymax = average_psi+sd_psi),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggeasy::easy_add_legend_title("Treatment") +
  xlab("") +
  facet_wrap(~target) +
  ggeasy::easy_rotate_x_labels(side = "right") +
  ggsci::scale_fill_npg() +
  ggeasy::easy_legend_at("top")

ggsave("markdown_images/Raver/unc aars and stmn barplot.pdf", height = 7, width = 9, units="cm")

Let’s also plot the splicing of TDPREG raver itself

csv <- paste0(data_dir, "Raver/WT autoregulation/C220830A16_2023-07-14_2133_20230714_064133_Rw.csv")

df <- parse_qiaxcel_output(csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 7269 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df3 <- preprocess_dataframe(df)

df4 <- df3 %>%
  mutate(column = as.numeric(column)) %>%
  left_join(positions %>%
              mutate(row = ifelse(row == "D", "E", "A")), by = c("row","column")) %>%
  mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP"))) %>%
  filter(plasmid %in% c("RV", "B5", "B11"))


p1 <- ggplot(df4, 
       aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  facet_wrap(~plasmid) +
  ggtitle("Cryptic splicing of TDP-Raver", "1,3,5 = NT, 2,4,5 = shTDP") +
  xlab("Sample number") +
  ylab("") +
  ylim(0.3, 0.57) +
  ggeasy::easy_remove_legend()
p1
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).

yo <- find_molar_ratios(df3, lower_band_pos = 0.35, lower_band_width = 0.05, lower_band_nts = 335-91,
                        upper_band_pos= 0.5, upper_band_width = 0.05, upper_band_nts = 335) %>%
  left_join(positions %>%
              mutate(row = ifelse(row == "D", "E", "A")) 
            %>% mutate(unique_id = paste0(row, column))) %>%
  filter(!is.na(replicate)) %>%
  ungroup() %>%
  group_by(plasmid, treatment) %>%
  mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
  mutate(average_psi = mean(psi)) %>%
  mutate(sd_psi = sd(psi)) %>%
  mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
                                plasmid == "B5" ~ "Cryptic\nRaver 6",
                                plasmid == "B11" ~ "Cryptic\nRaver 9",
                                plasmid == "mScar" ~ "mScarlet\ncontrol"),
         levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))  %>%
  filter(plasmid %in% c("RV", "B5", "B11"))
## Joining with `by = join_by(unique_id)`
p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("Cryptic exon PSI") +
  geom_errorbar(aes(ymin = average_psi-sd_psi,
                            ymax = average_psi+sd_psi),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggtitle("TDP-Raver autoregulation") +
  ggeasy::easy_add_legend_title("Treatment") +
  xlab("") +
  ggsci::scale_fill_npg()

p2

p1 / p2
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).

ggsave("markdown_images/Raver/new autoregulation plot.pdf", height = 14, width = 14, units="cm")
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).

Growth competition

We put SKNBE2 cells with dox-inducible expression vectors for constitutive or TDP-REG-gated TDP/Raver (or BFP).

gc_csv <- paste0(data_dir, "Raver/growth competition dream3/C220830A16_2023-07-15_2135_20230715_042136_Rw.csv")

df <- parse_qiaxcel_output(gc_csv)
## Rows: 3634 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df)

#tweak_df <- data.frame(unique_id = "D12", shift = 0, multiply = 0.97)

#df3 <- tweak_positions(df2, tweak_df) 
df3 <- df2  %>%
  filter(column < 10)

positions <- data.frame(row = "A", column = 1:9) %>%
  mutate(dox = c(0, 30, 1000, 0, 30, 1000, 0, 30, 1000),
         replicate = c(1, 1, 1, 2, 2, 2, 3, 3, 3))

df4 <- df3 %>%
  mutate(column = as.numeric(column)) %>%
  left_join(positions, by = c("row","column")) %>%
  mutate(actual_sample_no = column)

p1 <- ggplot(df4, 
       aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  ylim(0.45,0.8) +
  ggtitle("Cryptic splicing of UNC13A", "1,3,5 = NT, 2,4,5 = shTDP") +
  xlab("Sample number") +
  ylab("") +
  ggeasy::easy_remove_legend()
p1
## Warning: Removed 27997 rows containing missing values (`geom_tile()`).

yo <- find_molar_ratios(df3, lower_band_pos = 0.5, lower_band_width = 0.03, lower_band_nts = 335,
                        upper_band_pos= 0.75, upper_band_width = 0.03, upper_band_nts = 335+280) %>%
  left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
  ungroup() %>%
  group_by(dox) %>%
  mutate(pc_constitutive = 100*(molar_fraction_lower_band)) %>%
  mutate(mean_constitutive = mean(pc_constitutive)) %>%
  mutate(sd = sd(pc_constitutive)) %>%
  mutate(pc_cryptic = 100-pc_constitutive)
## Joining with `by = join_by(unique_id)`
p2 <- ggplot(yo %>% distinct(mean_constitutive, .keep_all=T), aes(x = factor(dox), y = mean_constitutive,
                                                                  fill = factor(dox))) +
  geom_bar(stat="identity", position= "dodge", alpha = 1) +
  ylab("% Constitutive") +
  geom_errorbar(aes(ymin = mean_constitutive-sd,
                            ymax = mean_constitutive+sd),
             position = position_dodge(width = 0.9)) +
  theme_classic() +
  ggtitle("TDP/Raver growth competition assay",
          "Expression of Constitutive/Cryptic raver is activated by dox") +
  ggeasy::easy_add_legend_title("Dox\nng/ml") +
  xlab("")
p2

stacked_df <- yo %>%
  dplyr::select(dox, replicate, pc_constitutive, pc_cryptic) %>%
  pivot_longer(cols = contains("pc_"))

ggplot(stacked_df, aes(x = factor(dox), y = value, fill = str_replace(name, "pc_c", "C"))) +
  geom_bar(stat="identity") +
  facet_wrap(~paste0("Replicate ", replicate)) +
  theme_classic() +
  xlab("Doxycycline ng/ul") +
  ylab("% detection") +
  ggeasy::easy_add_legend_title("Construct") +
  ggtitle("Growth competition of dox-activated TDP-43-Raver") +
  ggsci::scale_fill_npg()

ggsave("markdown_images/Raver/growth competition plot.pdf", height = 7, width = 14, units="cm")

and now let’s analyse the Nanopore as obviously this is more informative

bams <- Sys.glob(paste0(data_dir, "Raver/growth competition dream3/aligned/*.bam"))

for(bam in bams){
  
  file <- word(word(bam, 2, sep="barcodes_"), 1, sep="\\.bam")
  
  df <- data.frame(scanBam(bam)) %>%
    filter(flag < 256) %>%
    group_by(rname) %>%
    mutate(n = n()) %>%
    select(n) %>%
    unique() %>%
    mutate(combo = file)
  
  if(bam == bams[1]){
    all_df <- df
  } else {
    all_df <- bind_rows(all_df, df)
  }
}
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
all_df2 <- all_df %>%
  filter(!is.na(rname)) %>%
  mutate(replicate = case_when(combo < 4 ~ 1,
                               combo < 7 ~ 2,
                               T ~ 3)) %>%
  mutate(dox = factor(case_when(combo %in% c(1,4,7) ~ 0,
                         combo %in% c(2,5,8) ~ 30,
                         combo %in% c(3,6,9) ~ 1000),
                      levels = c(0, 30, 1000))) %>%
  ungroup() %>%
  group_by(replicate, dox) %>%
  mutate(pc = 100*n/sum(n)) %>%
  mutate(good_replicate = paste0("Replicate ", replicate),
         good_rname = factor(case_when(rname == "Dream1_rv_constitutive" ~ "Constitutive",
                                rname == "Dream1_rv_b5" ~ "Cryptic 6",
                                rname == "Dream1_rv_b11" ~ "Cryptic 9",
                                rname == "BFP" ~ "BFP"),
                             levels = c("Constitutive", "Cryptic 6",
                                        "Cryptic 9", "BFP")))
all_df3 <- all_df2 %>%
  ungroup() %>%
  group_by(good_replicate, good_rname) %>%
  mutate(dox0_pc = max(ifelse(dox == 0, pc, -1))) %>%
  mutate(change_pc = pc - dox0_pc)


p1 <- ggplot(all_df2 %>% ungroup(), aes(x = dox, y = pc, colour = good_rname)) +
  geom_line(aes(group = good_rname)) +
  geom_point() +
  facet_wrap(~good_replicate) +
  theme_classic() +
  ylim(0,NA) +
  ggeasy::easy_remove_legend() +
  ylab("% detected") +
  xlab("Doxycycline (ng/ml)") +
  ggsci::scale_colour_npg()

p2 <- ggplot(all_df3 %>% filter(dox != 0), aes(x = dox, y = change_pc, fill = good_rname)) +
  geom_bar(stat="identity", position = "dodge", width=0.7) +
  facet_wrap(~good_replicate) +
  theme_classic() +
  ggeasy::easy_add_legend_title("Construct") +
  ylab("Change in %\nversus 0 ng/ml") +
  xlab("Doxycycline (ng/ml)") +
  ggsci::scale_fill_npg()

p2

p1 | p2

ggsave("markdown_images/Raver/growth competition summary nanopore.pdf",
       height = 6, width = 19, units="cm")

Specificity of AARS1

First let’s look at cell lines

df <- read_csv("small_data_files/aars1 specificity/cell_line_counts_psi.csv.gz") %>%
  filter(!str_detect(experiment, "FAC")) %>%
  mutate(ce_psi = 100*ce_psi) %>%
  mutate(condition = ifelse(condition == "control", "Control", "TDP-43 -ve")) %>%
  group_by(experiment, condition) %>%
  mutate(average_ce = mean(ce_psi),
         sd_ce = sd(ce_psi)) %>%
  mutate(experiment = str_replace_all(experiment, "-", "\n")) %>%
  mutate(experiment = str_replace_all(experiment, "SH\n", "SH-"))
## Rows: 62 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): sample_name, experiment, condition
## dbl (4): annotated, novel_acceptor, novel_donor, ce_psi
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary <- df %>%
  distinct(experiment, condition, average_ce, sd_ce)

cell_line_plot <- ggplot(summary, aes(x = experiment, y = average_ce, fill = condition)) +
  geom_bar(position = "dodge", stat="identity") +
  theme_classic() +
    geom_errorbar(data = summary, aes(ymin = average_ce - sd_ce, ymax = average_ce + sd_ce),
                width = 0.2, position = position_dodge(width = 0.8)) +
  geom_point(data = df, aes(x = experiment, y = ce_psi, colour = "grey50", alpha = 0.1),
             position = position_dodge(width = 0.8)) +
  scale_fill_npg() +
  scale_color_identity() +
  ylab("% AARS1 CE inclusion") +
  xlab("") +
  ggeasy::easy_remove_legend_title() +
  ggtitle("AARS1 CE detection in cell lines")

now let’s look at patient samples

df <- read_csv("small_data_files/aars1 specificity/aars1_ce_in_nygc.csv.gz") %>% 
  dplyr::select(individual, sample, psi, tissue_clean, disease, type, tdp_path, disease_full) %>%
  mutate(psi = 100*psi) %>%
  pivot_wider(values_from = "psi", names_from = type) %>%
  mutate(psi = 0.5*(novel_acceptor + novel_donor)) %>%
  group_by(tissue_clean) %>%
  mutate(can_be_path = n_distinct(tdp_path) == 2) %>%
  filter(can_be_path) %>%
  mutate(tdp_disease = ifelse(disease %in% c("ALS-TDP", "FTD-TDP"), "Yes", "No"))
## Rows: 3364 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): paste_into_igv_junction, sample, individual, region, tissue, tissu...
## dbl  (4): psi, age, onset, n_vitro
## lgl  (1): disease_tissue
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nygc_plot <- ggplot(df, aes(x = disease, y = psi, fill = tdp_disease)) +
  geom_boxplot(outlier.alpha = 0.2) +
  facet_wrap(~tissue_clean) +
  ggpubr::theme_classic2() +
  ggeasy::easy_rotate_x_labels(side = "right") +
  ggeasy::easy_add_legend_title("TDPopathy?") +
  ggtitle("NYGC RNA-seq data",
          "Filtered for CNS regions where TDP-43 pathology has been reported") +
  xlab("Disease") +
  ylab("% AARS1 CE inclusion") +
  scale_fill_npg()
  
cell_line_plot / nygc_plot

ggsave("markdown_images/AARS1 specificity/combined_plot.pdf",
       height = 20, width = 17, units="cm")

New triple cryptic Cre analysis

files <- Sys.glob(paste0(data_dir, "/Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/*.csv.gz"))

files <- files[which(!str_detect(files, "-1|r1"))] # ignore positive control as doesn't have CEs

file <- files[1]
for(file in files){
  print(file)
  bc1 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 1, 1)
  bc2 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 2, 2)
  
  csv <- read_csv(file)
  
  if(nrow(csv) == 0){
    next
  }
  
  csv$flag_string[which(is.na(csv$flag_string))] <- ""
  
  csv <- csv %>%
    filter(reference == "triple_cryptic_cre") %>%
    filter(!str_detect(flag_string, "not primary|supplementary")) %>%
    filter(!is.na(junctions)) %>%
    filter(str_detect(junctions, "2381-2475")) %>%
    mutate(first_cryptic = str_detect(junctions, "235-399;492-721"),
           second_cryptic = str_detect(junctions, "820-984;1083-1312"),
           third_cryptic = str_detect(junctions, "1589-1753;1862-2091")) %>%
    mutate(bc1 = bc1,
           bc2 = bc2) 
  
  if(file == files[1]){
    cryptic_df <- csv
  } else {
    cryptic_df <- bind_rows(cryptic_df, csv)
  }
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r2.csv.gz"
## Rows: 13 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r3.csv.gz"
## Rows: 91 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r4.csv.gz"
## Rows: 5 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r2.csv.gz"
## Rows: 488 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r3.csv.gz"
## Rows: 376 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r4.csv.gz"
## Rows: 115 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r2.csv.gz"
## Rows: 105 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r3.csv.gz"
## Rows: 192 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r4.csv.gz"
## Rows: 71 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r2.csv.gz"
## Rows: 750 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r3.csv.gz"
## Rows: 541 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r4.csv.gz"
## Rows: 105 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r2.csv.gz"
## Rows: 280 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r3.csv.gz"
## Rows: 253 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r4.csv.gz"
## Rows: 136 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r2.csv.gz"
## Rows: 461 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r3.csv.gz"
## Rows: 811 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r4.csv.gz"
## Rows: 225 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r2.csv.gz"
## Rows: 53 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r3.csv.gz"
## Rows: 37 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r4.csv.gz"
## Rows: 146 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r2.csv.gz"
## Rows: 389 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r3.csv.gz"
## Rows: 359 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r4.csv.gz"
## Rows: 109 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- cryptic_df %>%
  mutate(ce_string = paste(as.numeric(first_cryptic), as.numeric(second_cryptic),
                           as.numeric(third_cryptic), sep="_")) %>%
  mutate(treatment = case_when(bc1 %in% c("f2", "f4", "f6", "f8") ~ "shTDP", 
                               T ~ "NT")) %>%
  mutate(n_CEs = case_when(bc2 == "r1" ~ 0,
                           bc2 == "r2" ~ 1,
                           bc2 == "r3" ~ 2,
                           bc2 == 'r4' ~ 3))

df2 <- df %>%
  group_by(ce_string, bc1, bc2) %>%
  mutate(n = sum(number_of_reads)) %>%
  ungroup() %>%
  distinct(bc1, bc2, ce_string, treatment, n_CEs, n) %>%
  group_by(bc1, bc2) %>%
  mutate(n_productive = sum(ifelse(ce_string == '1_1_1', 1, 0)*n)) %>%
  mutate(n_total = sum(n)) %>%
  mutate(pc_productive = 100*n_productive/sum(n)) %>%
  distinct(n_CEs, treatment, pc_productive, n_productive, n_total) %>%
  filter(n_total > 50)
  
ggplot(df2, aes(x = factor(n_CEs), y = pc_productive, fill = treatment)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.7) +
  theme_minimal() +
  ylab("% productive transcripts") +
  xlab('Number of CEs in construct') +
  ggeasy::easy_add_legend_title('Treatment') +
  ggsci::scale_color_npg()

ggsave("markdown_images/Cre/new_cre_plot.pdf", height = 6, width = 9, units = 'cm')

UNC13A at the synapse

Creating an global p value is challenging as we cannot assume distribution is normal, and N=3 is unsuitable for non-parametric test.

We CAN create accurate p value within each replicate, showing that B5 and B11 are raised relative to mScarlet.

We can assume normality and use a ratio t-test for summaries across replicates, then correct for multiple testing (n=2 tests)

# Read in the quantifications from Pete

unc13a_df <- read_csv(paste0(data_dir, '/synapse_data/unc13a intensity.csv')) %>%
  pivot_longer(cols = c('Mscarlet', 'B5', 'B11')) %>%
  filter(!is.na(value)) %>%
  group_by(replicate, name) %>%
  mutate(average = mean(value)) %>%
  mutate(name = str_replace(name, 'Mscarlet', 'mScarlet')) %>%
  mutate(name = factor(name, levels = c('mScarlet', 'B5', 'B11'))) %>%
  mutate(replicate = as.numeric(str_sub(replicate, 2, 2)))
## Rows: 76 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): replicate
## dbl (3): Mscarlet, B5, B11
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unc13a_summary_df <- unc13a_df %>% distinct(name, replicate, average) %>% ungroup()

unc13a_wide_summary <- unc13a_summary_df %>%
  pivot_wider(names_from = 'name', values_from = 'average')

print(t.test(log(unc13a_wide_summary$B5/unc13a_wide_summary$mScarlet), alternative = 'greater'))
## 
##  One Sample t-test
## 
## data:  log(unc13a_wide_summary$B5/unc13a_wide_summary$mScarlet)
## t = 4.8952, df = 2, p-value = 0.01964
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  0.1990963       Inf
## sample estimates:
## mean of x 
## 0.4934265
print(t.test(log(unc13a_wide_summary$B11/unc13a_wide_summary$mScarlet), alternative = 'greater'))
## 
##  One Sample t-test
## 
## data:  log(unc13a_wide_summary$B11/unc13a_wide_summary$mScarlet)
## t = 8.22, df = 2, p-value = 0.00724
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  0.3227699       Inf
## sample estimates:
## mean of x 
## 0.5005952
# Plot all values across replicates
p1 <- ggplot(unc13a_df, aes(x = name, y = value, fill = name)) +
  geom_dotplot(binaxis = 'y', binwidth = 4, stackdir = 'center') +
  facet_wrap(~paste0('Replicate ', replicate)) +
  ylim(0, NA) +
  theme_classic() +
  ggsci::scale_fill_npg() +
  ggeasy::easy_remove_legend() +
  ylab('UNC13A fluoresence') +
  xlab('Construct')
p1 

# Plot summaries
p2 <- ggplot(unc13a_summary_df, aes(x = name, y = average, 
                              colour = factor(replicate))) +
  geom_bar(data= unc13a_summary_df %>% 
             group_by(name) %>% 
             mutate(m = mean(average)) %>% 
             distinct(name, m), aes(x = name, y = m), 
           fill = 'grey80', colour = 'grey60', stat='identity') +
  geom_point(position = position_jitter(width = 0.1, height = 0)) +
  ylim(0, NA) +
  theme_classic() +
  ggeasy::easy_add_legend_title('Replicate') +
  ylab('UNC13A fluoresence') +
  xlab('Construct')
p2

# Calculate extended Mann Whitney U (Kruskal Wallis) for each replicate

analyse_replicate <- function(replicate_number) {
  this_df <- unc13a_df %>% filter(replicate == replicate_number)
  k <- kruskal.test(value ~ name, data = this_df)
  dt <- dunnTest(value ~ name, data = this_df)$res %>%
    mutate(replicate = replicate_number)
  return(dt)
}

combined_df <- map_dfr(1:3, analyse_replicate) %>%
  mutate(full_padj = p.adjust(P.unadj, method='BH'))  # arguably this is too conservative, but all relevant are significant anyway

p1

ggsave("markdown_images/synapse/unc13a_quant_split_by_replicate.pdf", height = 7, width = 14, units="cm")
ggsave("markdown_images/synapse/unc13a_quant_split_by_replicate.png", height = 7, width = 14, units="cm")

Double check that paired logarithmic ratio t test implementation is valid

make_paired_data <- function(n = 3, multiply = 1, sd = 1, mean = 10){
  x = rnorm(n = n, mean = mean, sd = sd)
  y = rnorm(n = n, mean = mean*multiply, sd = sd*multiply)
  
  result = t.test(log(y/x), alternative = 'greater')
  
  return(result$p.value)
}

rep = 100000

null_true <- replicate(rep, make_paired_data(multiply=1))
null_false <- replicate(rep, make_paired_data(multiply=1.1))

print(sum(as.numeric(null_true < 0.05)) / rep)
## [1] 0.04968
print(sum(as.numeric(null_false < 0.05)) / rep)
## [1] 0.20292
# As expected, values of 0.05 and then >0.05

For reviewers eyes only - Synapse measurements

freq_df <- read_csv(paste0(data_dir, '/synapse_data/synapse frequency data.csv')) %>%
  pivot_longer(cols = c('mScarlet', 'B5', 'B11')) %>%
  filter(!is.na(value)) %>%
  mutate(name = case_when(name == 'mScarlet' ~ 'mSc',
                          name == 'B5' ~ '#6',
                          name == 'B11' ~ '#9'))
## Rows: 28 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): replicate
## dbl (3): mScarlet, B5, B11
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(freq_df, aes(x = name, y = value, fill = name)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.15) +
  theme_classic() +
  xlab('Construct') +
  ylab('Frequency / Hz') +
  ggeasy::easy_remove_legend() +
  ggtitle('Patch clamping of piggyBac i3 Neurons')

ggsave("~/Downloads/frequency_data.png", height = 10, width = 10, units = 'cm')

Run on loads of sequences

# Read in a file with all the predictions for these proteins
full_df <- read_csv(paste0(data_dir, '/running_on_many_sequences/processed.csv.gz'))
## Rows: 248769 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): type, filename, protein
## dbl (3): rel_pos, value, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# And read in the commands that were used for SpliceNouveau
commands_ecoli <- read_csv(paste0(data_dir, '/running_on_many_sequences/uniprotkb_proteome_UP000000625_AND_revi_2024_01_18_commands.fixed.csv.gz'))
## Rows: 1891 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): command, protein_name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ecoli_proteins <- unique(commands_ecoli$protein_name)

commands_human <- read_csv(paste0(data_dir, '/running_on_many_sequences/uniprotkb_proteome_UP000005640_AND_revi_2024_01_18_commands.fixed.csv.gz')) 
## Rows: 1977 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): command, protein_name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_proteins <- unique(commands_human$protein_name)

full_df2 <- full_df %>%
  filter(abs(rel_pos) < 50) %>%
  group_by(protein) %>%
  filter(score == max(score)) %>%
  filter(score > 1.8) %>%
  mutate(type = factor(type, levels = c('Constitutive donor',
                                        'Cryptic acceptor',
                                        'Cryptic donor',
                                        'Constitutive acceptor')))

n_distinct(full_df2$protein)
## [1] 197
p1 <- ggplot(full_df2 %>% filter(protein %in% ecoli_proteins), aes(x = rel_pos, y = factor(score), fill = value)) +
  geom_tile() +
  facet_grid(cols = vars(type), rows = NULL) +
  scale_fill_viridis_c() +
  #scale_fill_continuous(low = 'white', high = 'black') +
  theme_classic() +
  ggeasy::easy_add_legend_title('SpliceAI\nScore') +
  ggtitle('First 100 E. coli proteins\n(3 failed due to short length)') +
  ggeasy::easy_remove_y_axis() +
  xlab('Position relative to splice site (nucleotides)')

p2 <- ggplot(full_df2 %>% filter(protein %in% human_proteins), aes(x = rel_pos, y = word(protein, 2, sep="_"), fill = value)) +
  geom_tile() +
  facet_grid(cols = vars(type), rows = NULL) +
  scale_fill_viridis_c() +
  #scale_fill_continuous(low = 'white', high = 'black') +
  theme_classic() +
  ggeasy::easy_add_legend_title('SpliceAI\nScore') +
  ggtitle('First 100 Human proteins') +
  ggeasy::easy_remove_y_axis() +
  xlab('Position relative to splice site (nucleotides)')

p1/p2

ggsave('~/Downloads/loads_seqs.png', height = 13, width = 17, units = 'cm')

Demonstrating that more optimised vectors are spliced better

min_reads <- 1
x = 0.01

# Read in junction counts
nt <- read_csv(paste0(data_dir, "/SpliceNouveau_optimisation/14h.nt.gz")) %>% mutate(condition = 'NT')
## Rows: 14884 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dox <- read_csv(paste0(data_dir, "/SpliceNouveau_optimisation/14h.dox.gz")) %>% mutate(condition = 'shTDP')
## Rows: 14631 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- bind_rows(nt, dox) %>%
  group_by(reference, condition) %>%
  filter(sum(number_of_reads) >= min_reads) %>%
  ungroup() %>%
  mutate(RPS_spliced = str_detect(junctions, '1020-1114')) %>%
  filter(RPS_spliced) %>%
  mutate(productive = junctions == '334-483;1020-1114') %>%
  mutate(opt_level = word(reference, 3, sep='_'),
         attempt = word(reference, 2, sep='_')) %>%
  group_by(reference, condition) %>%
  mutate(frac_productive = sum(ifelse(productive, number_of_reads, 0))/sum(number_of_reads)) %>%
  mutate(frac_correct_donor = sum(ifelse(str_detect(junctions, '334-'), number_of_reads, 0)) / sum(number_of_reads)) %>%
  ungroup() %>%
  group_by(reference, condition, junctions) %>%
  mutate(n_this_junctions = sum(number_of_reads))

# Analyse what fraction use the correct donor

correct_donor_df <- df %>%
  ungroup() %>%
  distinct(reference, opt_level, condition, frac_correct_donor) %>%
  group_by(reference)

p1 <- ggplot(correct_donor_df, aes(x = opt_level, y = 100*frac_correct_donor, fill = condition)) +
  geom_boxplot() +
  ylab('% usage expected\ndonor splice site') +
  theme_classic() +
  xlab('Optimisation level') +
  ggsci::scale_fill_npg() +
  ggeasy::easy_add_legend_title('Treatment')

# See if this holds if we exclude Intron Retention from the analysis

df_ignore_ir <- bind_rows(nt, dox) %>%
  filter(junctions != '1020-1114') %>%  # ignore IR
  mutate(RPS_spliced = str_detect(junctions, '1020-1114')) %>%
  filter(RPS_spliced) %>%
  mutate(productive = junctions == '334-483;1020-1114') %>%
  mutate(opt_level = word(reference, 3, sep='_'),
         attempt = word(reference, 2, sep='_')) %>%
  group_by(reference, condition) %>%
  mutate(frac_productive = sum(ifelse(productive, number_of_reads, 0))/sum(number_of_reads)) %>%
  mutate(frac_correct_donor = sum(ifelse(str_detect(junctions, '334-'), number_of_reads, 0)) / sum(number_of_reads)) %>%
  ungroup() %>%
  group_by(reference, condition, junctions) %>%
  mutate(n_this_junctions = sum(number_of_reads))

correct_donor_df_ignore_ir <- df_ignore_ir %>%
  ungroup() %>%
  distinct(reference, opt_level, condition, frac_correct_donor)

p2 <- ggplot(correct_donor_df_ignore_ir, aes(x = opt_level, y = frac_correct_donor, fill = condition)) +
  geom_boxplot() +
  theme_classic() +
  ylab('% usage expected donor\nsplice site (ignoring IR)') +
  xlab('Optimisation level') +
  ggeasy::easy_add_legend_title('Treatment') +
  ggsci::scale_fill_npg()

# Analyse the fraction of productive transcripts. Does it increase with TDP-43 KD?

productive_df <- df %>%
  ungroup() %>%
  distinct(reference, opt_level, condition, frac_productive) %>%
  group_by(reference) %>%
  mutate(increase = max(ifelse(condition == 'shTDP', frac_productive, -1)) - max(ifelse(condition == 'NT', frac_productive, -1)))

p3 <- ggplot(productive_df, aes(x = opt_level, y = frac_productive, fill = condition)) +
  geom_boxplot() +
  theme_classic() +
  ggsci::scale_fill_npg() +
  xlab('Optimisation level') +
  ylab('% Productive transcripts') +
  ggeasy::easy_add_legend_title('Treatment')

increased_df <- productive_df %>%
  distinct(reference, opt_level, increase) %>%
  ungroup() %>%
  group_by(opt_level) %>%
  mutate(pc_that_increase = 100*sum(as.numeric(increase > 0.50)) / n_distinct(reference)) %>%
  distinct(opt_level, pc_that_increase)

# p4 <- ggplot(increased_df, aes(x = opt_level, y = pc_that_increase, fill = pc_that_increase)) + 
#   geom_bar(stat='identity') +
#   theme_classic() +
#   xlab('Optimisation level') +
#   ylab('% constructs >50% increase of\nproductive transcripts with TDP-43 KD') +
#   ggtitle('Response to TDP-43 KD') +
#   scale_fill_viridis_c() +
#   ggeasy::easy_remove_legend()

p4 <- ggplot(productive_df %>% distinct(reference, opt_level, increase), 
       aes(x = factor(opt_level), y = 100*increase)) +
  geom_violin(scale = 'width', fill = 'grey50') +
    theme_classic() +
  xlab('Optimisation level') +
  ylab('% increase in productive transcripts') +
  ggtitle('Response to TDP-43 KD')
  
(p1|p2)/(p3|p4) + plot_annotation(tag_levels = 'A')

ggsave('markdown_images/better_score_better_splice/combined_bsbs_figs.pdf', height = 14, width = 17, units = 'cm')

Repeat of 12QN incucyte experiment

generate_full_image <- function(combined_positions, all_images, n_rows, 
                                n_columns=6, dim_image=200, spacing_images=5,
                                spacing_wells=30, background_quantile=0.97){
  
  # This function makes a plot from incucyte data.
  # You need to supply a "combined_positions" dataframe that has values for
  # plate, well and Order
  
  # Currently it only works when you have four images per well (stupidly named "replicates")
  
  height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
  width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
  
  full_image <- matrix(nrow = height, ncol = width, 0)
  
  background <- 10000000 # set arbitrarily large initial values
  
  for(image in all_images){
    filename <- word(image, sep="/", start=-1, end = -1)
    
    if(!filename %in% combined_positions$filename){
      next
    }
    
    print(filename)
    plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
    print(plate)
    
    well = word(filename, sep="_", start=2, end=2)
    print(well)
    plate_column = as.numeric(str_sub(well, 2, 3))
    print(plate_column)
    
    #image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
    image_column <- plate_column
    
    image_row = unique(combined_positions$Order[which(combined_positions$well == well & 
                                                        combined_positions$Plate == plate)])
    print(image_row)
    
    if(is.na(image_row)){
      next
    }
    
    if("replicate" %in% colnames(combined_positions)){
      replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
    } else {
      replicate = as.numeric(word(filename, sep="_", start=3, end=3))
    }
    
    if(replicate == 2){
      replicate = 3
    } else if(replicate == 3){
      replicate = 2
    }
    
    image_matrix <- readImage(image)
    smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
    
    well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    print('yo')
    print(image_column)
    print(well_top_left_x)
    
    image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
    print(replicate)
    print(image_top_left_x)
    
    well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    print(well_top_left_y)
    
    image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
    print(image_top_left_y)
    
    full_image[image_top_left_y:(image_top_left_y+dim_image-1),
               image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
    
    this_background <- quantile(smaller, 0.05)
    
    if(this_background < background){
      background <- this_background
    }
  }
  
  full_image2 <- full_image - background
  img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
  
  return(img_invert)
}

plate_data <- read_csv(paste0(data_dir, "/12qn repeat/plate_info.csv")) %>%
  mutate(cell_type = ifelse(cell_type == 'SK-N-DZ', 'SK-N-BE2', cell_type)) %>%
  filter(cell_type == 'SK-N-BE2') %>%
    mutate(row = str_sub(coordinate, 1, 1),
         column = as.numeric(str_sub(coordinate, 2, 2))) %>%
  mutate(replicate = as.numeric(str_sub(replicate, 2, 2))) %>%
  dplyr::rename(experimental_replicate = replicate)
## Rows: 96 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): coordinate, transfection, cell_type, replicate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
red_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/orange/*"))
#red_12qn <- Sys.glob("~/Downloads/orange/*")

positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 2))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  inner_join(plate_data) %>%
  #mutate(plate_column = plate_column - min(plate_column) + 1) %>%
  mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im <- generate_full_image(positions, red_12qn, n_rows = 4, background_quantile = 0.995, n_columns = 10)
## [1] "VID501_A5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "VID501_A5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "VID501_A5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "VID501_A5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "VID501_A6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "VID501_A6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "VID501_A6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "VID501_A6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "VID501_A7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "VID501_A7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "VID501_A7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "VID501_A7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "VID501_A8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "VID501_A8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "VID501_A8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "VID501_A8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "VID501_B5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "VID501_B5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "VID501_B5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "VID501_B5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "VID501_B6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "VID501_B6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "VID501_B6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "VID501_B6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "VID501_B7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "VID501_B7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "VID501_B7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "VID501_B7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "VID501_B8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "VID501_B8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "VID501_B8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "VID501_B8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "VID501_C5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 871
## [1] 871
## [1] "VID501_C5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 871
## [1] 1076
## [1] "VID501_C5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 871
## [1] 871
## [1] "VID501_C5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 871
## [1] 1076
## [1] "VID501_C6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 871
## [1] 871
## [1] "VID501_C6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 871
## [1] 1076
## [1] "VID501_C6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 871
## [1] 871
## [1] "VID501_C6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 871
## [1] 1076
## [1] "VID501_C7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 871
## [1] 871
## [1] "VID501_C7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 871
## [1] 1076
## [1] "VID501_C7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 871
## [1] 871
## [1] "VID501_C7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 871
## [1] 1076
## [1] "VID501_C8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 871
## [1] 871
## [1] "VID501_C8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 871
## [1] 1076
## [1] "VID501_C8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 871
## [1] 871
## [1] "VID501_C8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 871
## [1] 1076
## [1] "VID501_D5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1306
## [1] 1306
## [1] "VID501_D5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1306
## [1] 1511
## [1] "VID501_D5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1306
## [1] 1306
## [1] "VID501_D5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1306
## [1] 1511
## [1] "VID501_D6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1306
## [1] 1306
## [1] "VID501_D6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1306
## [1] 1511
## [1] "VID501_D6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1306
## [1] 1306
## [1] "VID501_D6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1306
## [1] 1511
## [1] "VID501_D7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1306
## [1] 1306
## [1] "VID501_D7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1306
## [1] 1511
## [1] "VID501_D7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1306
## [1] 1306
## [1] "VID501_D7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1306
## [1] 1511
## [1] "VID501_D8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1306
## [1] 1306
## [1] "VID501_D8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1306
## [1] 1511
## [1] "VID501_D8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1306
## [1] 1306
## [1] "VID501_D8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1306
## [1] 1511
rgb_red <- rgbImage(red = (1-im)*0.5, green = NULL, blue = NULL)
display(rgb_red)

writeImage(rgb_red, "markdown_images/12QN repeat/uncropped_mScarlet.png", quality=90)




green_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/green/*"))

positions <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 2))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  inner_join(plate_data) %>%
  #mutate(plate_column = plate_column - min(plate_column) + 1) %>%
  mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im_green <- generate_full_image(positions, green_12qn, n_rows = 4, background_quantile = 1, n_columns = 10)
## [1] "VID501_A5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "VID501_A5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "VID501_A5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "VID501_A5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "VID501_A6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "VID501_A6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "VID501_A6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "VID501_A6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "VID501_A7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "VID501_A7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "VID501_A7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "VID501_A7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "VID501_A8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "VID501_A8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "VID501_A8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "VID501_A8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "VID501_B5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "VID501_B5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "VID501_B5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "VID501_B5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "VID501_B6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "VID501_B6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "VID501_B6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "VID501_B6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "VID501_B7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "VID501_B7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "VID501_B7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "VID501_B7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "VID501_B8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "VID501_B8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "VID501_B8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "VID501_B8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "VID501_C5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 871
## [1] 871
## [1] "VID501_C5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 871
## [1] 1076
## [1] "VID501_C5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 871
## [1] 871
## [1] "VID501_C5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 871
## [1] 1076
## [1] "VID501_C6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 871
## [1] 871
## [1] "VID501_C6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 871
## [1] 1076
## [1] "VID501_C6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 871
## [1] 871
## [1] "VID501_C6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 871
## [1] 1076
## [1] "VID501_C7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 871
## [1] 871
## [1] "VID501_C7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 871
## [1] 1076
## [1] "VID501_C7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 871
## [1] 871
## [1] "VID501_C7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 871
## [1] 1076
## [1] "VID501_C8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 871
## [1] 871
## [1] "VID501_C8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 871
## [1] 1076
## [1] "VID501_C8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 871
## [1] 871
## [1] "VID501_C8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 871
## [1] 1076
## [1] "VID501_D5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1306
## [1] 1306
## [1] "VID501_D5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1306
## [1] 1511
## [1] "VID501_D5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1306
## [1] 1306
## [1] "VID501_D5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1306
## [1] 1511
## [1] "VID501_D6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1306
## [1] 1306
## [1] "VID501_D6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1306
## [1] 1511
## [1] "VID501_D6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1306
## [1] 1306
## [1] "VID501_D6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1306
## [1] 1511
## [1] "VID501_D7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1306
## [1] 1306
## [1] "VID501_D7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1306
## [1] 1511
## [1] "VID501_D7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1306
## [1] 1306
## [1] "VID501_D7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1306
## [1] 1511
## [1] "VID501_D8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1306
## [1] 1306
## [1] "VID501_D8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1306
## [1] 1511
## [1] "VID501_D8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1306
## [1] 1306
## [1] "VID501_D8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1306
## [1] 1511
rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)

writeImage(rgb_green, "markdown_images/12QN repeat/uncropped_green.png", quality=90)

and how with HEK cells

generate_full_image <- function(combined_positions, all_images, n_rows, 
                                n_columns=6, dim_image=200, spacing_images=5,
                                spacing_wells=30, background_quantile=0.97){
  
  # This function makes a plot from incucyte data.
  # You need to supply a "combined_positions" dataframe that has values for
  # plate, well and Order
  
  # Currently it only works when you have four images per well (stupidly named "replicates")
  
  height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
  width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
  
  full_image <- matrix(nrow = height, ncol = width, 0)
  
  background <- 10000000 # set arbitrarily large initial values
  
  for(image in all_images){
    this_filename <- word(image, sep="/", start=-1, end = -1)
    
    this_data <- combined_positions %>% filter(filename == this_filename)
    
    if(!this_filename %in% combined_positions$filename){
      next
    }
    
    plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
    print(plate)
    
    well = this_data$well[1]
    plate_column = this_data$column[1]
    
    #image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
    image_column <- plate_column
    
    image_row = unique(combined_positions$Order[which(combined_positions$well == well & 
                                                        combined_positions$Plate == plate)])
    print(image_row)
    
    if(is.na(image_row)){
      next
    }
    
    if("replicate" %in% colnames(combined_positions)){
      replicate <- combined_positions$replicate[which(combined_positions$filename == this_filename)]
    } else {
      replicate = as.numeric(word(filename, sep="_", start=3, end=3))
    }
    
    if(replicate == 2){
      replicate = 3
    } else if(replicate == 3){
      replicate = 2
    }
    
    image_matrix <- readImage(image)
    smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
    
    well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    print('yo')
    print(image_column)
    print(well_top_left_x)
    
    image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
    print(replicate)
    print(image_top_left_x)
    
    well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
    print(well_top_left_y)
    
    image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
    print(image_top_left_y)
    
    full_image[image_top_left_y:(image_top_left_y+dim_image-1),
               image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
    
    this_background <- quantile(smaller, 0.05)
    
    if(this_background < background){
      background <- this_background
    }
  }
  
  full_image2 <- full_image - background
  img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
  
  return(img_invert)
}



plate_data <- bind_rows(data.frame(column = 1:12, row = 'A'), data.frame(column = 1:12, row = 'B')) %>%
  mutate(cell_type = 'HEK293T') %>%
  mutate(plasmid = case_when(column <= 3 ~ 'untransfected',
                             column <= 6 ~ '12QN',
                             column <= 9 ~ 'WT',
                             column <= 12 ~ 'SNAP-only'))

red_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/hek293T/2024.05.08_12QN Orange 400ms HEK293T in PBS (Incucyte S5B)/orange/*"))
#red_12qn <- Sys.glob("~/Downloads/orange/*")

positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  inner_join(plate_data) %>%
  #mutate(plate_column = plate_column - min(plate_column) + 1) %>%
  mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im <- generate_full_image(positions, red_12qn, n_rows = 2, background_quantile = 0.999, n_columns = 12)
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 1
## [1] 206
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 436
## [1] 641
rgb_red <- rgbImage(red = (1-im)*0.5, green = NULL, blue = NULL)
display(rgb_red)

writeImage(rgb_red, "markdown_images/12QN repeat/hek293T/uncropped_mScarlet.png", quality=90)




green_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/hek293T/2024.05.08_12QN Orange 400ms HEK293T in PBS (Incucyte S5B)/green/*"))


positions <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
  mutate(well = word(filename, 2, sep="_"),
         replicate = as.numeric(word(filename, 3, sep="_"))) %>%
  mutate(row = str_sub(well, 1, 1),
         column = as.numeric(str_sub(well, 2, 3))) %>%
  left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
                       Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
  mutate(plate_column = column) %>%
  mutate(Plate = 1) %>%
  inner_join(plate_data) %>%
  #mutate(plate_column = plate_column - min(plate_column) + 1) %>%
  mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im_green <- generate_full_image(positions, green_12qn, n_rows = 2, background_quantile = 0.999, n_columns = 12)
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 1
## [1] 206
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 436
## [1] 641
rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)

writeImage(rgb_green, "markdown_images/12QN repeat/hek293T/uncropped_green.png", quality=90)

shRNA experiment

image_files <- Sys.glob(paste0(data_dir, "/sami/4x/*.png"))

mini_spacing = 5
big_spacing = 15
dim_image = 200

image_df <- data.frame(filename = word(image_files, -1, sep="/"),
                       full_dir = image_files) %>%
  mutate(well = word(filename, 2, sep="_")) %>%
  mutate(row = str_sub(well,1,1)) %>%
  mutate(row_number = case_when(row == "A" ~ 1,
                                row == "B" ~ 2,
                                row == "C" ~ 3,
                                row == "D" ~ 4,
                                row == "E" ~ 5,
                                row == "F" ~ 6,
                                row == "G" ~ 7,
                                row == "H" ~ 8)) %>%
  mutate(column = as.numeric(str_sub(well, 2, -1)),
         image_number = as.numeric(word(filename, 3, sep="_"))) %>%
  filter(column <= 6) %>%  # just b5
  mutate(big_row = ifelse(column <= 3, row_number, row_number+8),
         small_row = ifelse(image_number > 2, 2, 1),
         big_column = ifelse(column <= 3, column, column -3),
         small_column = ifelse(image_number > 2, image_number - 2, image_number)) %>%
  mutate(n_mini_spaces_x = big_column - 1 + small_column - 1,
         n_big_spaces_x = big_column - 1,
         n_mini_spaces_y = big_row - 1 + small_row - 1,
         n_big_spaces_y = big_row - 1,
         n_previous_images_x = 2*(big_column-1) + small_column - 1,
         n_previous_images_y = 2*(big_row-1) + small_row - 1) %>%
  mutate(start_x = 1+n_big_spaces_x*big_spacing + n_mini_spaces_x*mini_spacing + n_previous_images_x*dim_image) %>%
  mutate(start_y = 1+n_big_spaces_y*big_spacing + n_mini_spaces_y*mini_spacing + n_previous_images_y*dim_image) %>%
  mutate(shRNA = case_when(row == "A" & column %in% 1:3 ~ 'FUS shRNA1',
                          row == "B" & column %in% 1:3 ~ 'FUS shRNA2',
                          row == "C" & column %in% 1:3 ~ 'FUS shRNA3',
                          row == "D" & column %in% 1:3 ~ 'hnRNPA1 shRNA1',
                          row == "E" & column %in% 1:3 ~ 'hnRNPA1 shRNA2',
                          row == "F" & column %in% 1:3 ~ 'hnRNPA1 shRNA3',
                          row == "G" & column %in% 1:3 ~ 'hnRNPC shRNA1',
                          row == "H" & column %in% 1:3 ~ 'hnRNPC shRNA2',
                          row == "A" & column %in% 4:6 ~ 'hnRNPC shRNA3',
                          row == "B" & column %in% 4:6 ~ 'hnRNPK shRNA1',
                          row == "C" & column %in% 4:6 ~ 'hnRNPK shRNA2',
                          row == "D" & column %in% 4:6 ~ 'hnRNPK shRNA3',
                          row == "E" & column %in% 4:6 ~ 'TDP-43 shRNA1',
                          row == "F" & column %in% 4:6 ~ 'TDP-43 shRNA2',
                          row == "G" & column %in% 4:6 ~ 'TDP-43 shRNA3',
                          row == "H" & column %in% 4:6 ~ 'control'))


# Find common normalisation factor
for(filename in image_df$full_dir){
  if(filename == image_df$full_dir[1]){
    all_vals <- sample(as.vector(readImage(filename)), 100)
  } else {
    all_vals <- c(all_vals, sample(as.vector(readImage(filename)), 100))
  }
}

normalisation <- quantile(all_vals, 1)

background_df <- image_df %>% filter(row == "A")
# Find background
for(filename in background_df$full_dir){
  if(filename == background_df$full_dir[1]){
    all_vals2 <- sample(as.vector(readImage(filename)), 100)
  } else {
    all_vals2 <- c(all_vals2, sample(as.vector(readImage(filename)), 100))
  }
}

background <- quantile(all_vals, 0.0)

height = max(image_df$start_y) + dim_image*2
width = max(image_df$start_x) + dim_image*2

full_image <- matrix(nrow = height, ncol = width, 0)

for(i in 1:nrow(image_df)){
    this_image <- readImage(image_df$full_dir[i])
    smaller <- as.array(EBImage::resize(this_image, w=dim_image*2, h=dim_image*2))

    start_x = image_df$start_x[i]
    start_y = image_df$start_y[i]

    full_image[start_y:(start_y+dim_image*2-1), start_x:(start_x+dim_image*2-1)] <- smaller
}

rgbimg <- rgbImage(t((full_image-background)/normalisation))
writeImage(rgbimg, "markdown_images/sami/4x_red.png", quality=90)

image_df$mean_intensity <- -1

for(i in 1:nrow(image_df)){
  this_image <- readImage(image_df$full_dir[i])
  mean_intensity <- mean(this_image)
  image_df$mean_intensity[i] <- mean_intensity
}

ggplot(image_df, aes(x = shRNA, y = mean_intensity, colour = str_detect(shRNA, 'TDP'))) +
  geom_point(alpha = 0.7) +
  theme_classic() +
  ggeasy::easy_rotate_x_labels(side = 'right') +
  ggsci::scale_colour_npg() +
  ggeasy::easy_remove_legend() +
  ylab('Mean intensity/(arbitrary units)') +
  ylim(0, NA)

ggsave("markdown_images/sami/sami_quants.png", height=10, width=12, units='cm')
ggsave("markdown_images/sami/sami_quants.pdf", height=10, width=12, units='cm')

RT-PCR from i3 neurons

df <- parse_qiaxcel_output(paste0(data_dir, "/i3 neurons/puja cDNA/C220830A16_2024-05-17_2203_20240517_053603_Rw.csv")) %>%
  mutate(value2 = as.numeric(value2))
## Rows: 10905 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `value2 = as.numeric(value2)`.
## Caused by warning:
## ! NAs introduced by coercion
df2 <- preprocess_dataframe(df) 

id_df <- df %>%
  distinct(Row, sample) %>%
  filter(str_length(Row) == 1) %>%
  filter(sample < 8) %>%
  mutate(sample_name = case_when(sample == 1 ~ "WT -protac",
                                 sample == 2 ~ "Halo +protac",
                                 sample == 3 ~ "B5 +protac",
                                 sample == 4 ~ "B11 +protac",
                                 sample == 5 ~ "mScarlet +protac",
                                 sample == 6 ~ "WT +protac",
                                 sample == 7 ~ "Halo -protac")) %>%
  mutate(unique_id = paste0(Row, sample))

ggplot(df2 %>% inner_join(id_df) %>%
         #filter(row == 'A') %>%
         filter(sample_name %in% c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')), 
       aes(y = index_for_plotting, fill = corrected_value, 
           x = factor(sample_name, levels = c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')))) +
  geom_tile() +
  theme_classic() +
  scale_fill_gradient(low = "white", high = "black") +
  ylim(0.55,0.8) +
  facet_wrap(~row)
## Joining with `by = join_by(unique_id)`
## Warning: Removed 48763 rows containing missing values (`geom_tile()`).

ggsave("markdown_images/i3 neurons/RT-PCR unc13a - all three replicates plot.pdf", height=20, width=20, units='cm')
## Warning: Removed 48763 rows containing missing values (`geom_tile()`).
molar_ratios <- df2 %>%
  mutate(band = case_when(abs(corrected_index - 0.6) < 0.02 ~ "lower_band",
                          abs(corrected_index - 0.72) < 0.035 ~ "upper_band",
                          T ~ "ignore")) %>%
  mutate(product_length = case_when(band == "upper_band" ~ 405+128,
                                    band == "lower_band" ~ 405,
                                    T ~ 0)) %>%
  group_by(unique_id, band) %>%
  mutate(integrated_area = sum(corrected_value)) %>%
  distinct(unique_id, band, integrated_area, product_length) %>%
  mutate(molar_value = integrated_area / product_length) %>%
  filter(band != "ignore") %>%
  ungroup() %>%
  dplyr::select(-product_length, -integrated_area) %>%
  pivot_wider(names_from = band, values_from = molar_value) %>%
  mutate(molar_fraction_lower_band = lower_band/(lower_band+upper_band)) %>%
  inner_join(id_df) %>%
  group_by(sample_name) %>%
  mutate(mean = mean(molar_fraction_lower_band),
          sd = sd(molar_fraction_lower_band))
## Joining with `by = join_by(unique_id)`
ggplot(molar_ratios, aes(x = sample_name, y = molar_fraction_lower_band)) +
  geom_point()

ggplot(molar_ratios %>% filter(!str_detect(sample_name, "WT")), 
       aes(x = factor(sample_name, levels = c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')), 
                      y = 100-100*molar_fraction_lower_band)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center') +
  theme_classic() +
  ylab('UNC13A CE PSI')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

ggsave("markdown_images/i3 neurons/RT-PCR unc13a.pdf", height=4.5, width=4, units='cm')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

TDP-RAVER virus in spinal cord

files <- Sys.glob(paste0(data_dir, "/raver_spinal_cord/*.bam"))

for(file in files){
  df <- data.frame(scanBam(file)) %>%
    mutate(name = file)
  
  if(file == files[1]){
    full_df <- df
  } else {
    full_df <- bind_rows(full_df, df)
  }
}

full_df2 <- full_df %>%
  filter(str_detect(name, "barcode0")) %>%
  filter(!str_detect(name, "barcode09")) %>%
  filter(rname == 'b11') %>%
  filter(flag %in% c(0, 16)) %>%
  mutate(ce = case_when(str_detect(cigar, "121N") & str_detect(cigar, "145N") ~ 'CE',
                        str_detect(cigar, '357N') ~ 'no_CE',
                        T ~ 'unclear')) %>%
  group_by(name, ce) %>%
  mutate(n = n()) %>%
  distinct(n) %>%
  mutate(barcode = as.numeric(str_sub(word(name, 2, sep='barcode'), 2, 2)))

full_df3 <- full_df2 %>%
  ungroup() %>%
  group_by(barcode) %>%
  mutate(pc_ce = 100*max(ifelse(ce == 'CE', n, -1)) / sum(n)) %>%
  distinct(pc_ce, barcode) %>%
  mutate(condition = factor(ifelse(barcode %% 2 == 1, 'cKO', 'WT'),
                            levels = c('WT', 'cKO'))) %>%
  ungroup() %>%
  group_by(condition) %>%
  mutate(m = mean(pc_ce))

ggplot(full_df3, aes(x = condition, y = pc_ce, fill = condition)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.2, dotsize = 5, alpha = 0.8) +
  ggtitle('Nanopore analysis of B11 in spinal cord') +
  theme_classic() +
  ggsci::scale_fill_npg() +
  ggeasy::easy_remove_legend() +
  ggtitle("") +
  ylab('Cryptic #9 PSI \n(mouse spinal cord)')

ggsave("markdown_images/raver_spinal_cord/nanopore_results.pdf", height=5, width=4, units='cm')


# Now using extraction of junctions...

files <- Sys.glob(paste0(data_dir, "/raver_spinal_cord/*.csv.gz"))

all_df <- map_df(files, function(file){
  df <- read_csv(file) %>% mutate(filename = word(file, -1, sep='/'))
}) %>%
    mutate(barcode = as.numeric(str_sub(filename, 2, 2))) %>%
  filter(!str_detect(flag_string, 'alignment')) %>%
    mutate(condition = factor(ifelse(barcode %% 2 == 1, 'cKO', 'WT'),
                            levels = c('WT', 'cKO'))) %>%
  mutate(isoform = case_when(junctions == '730-1088' ~ 'no_CE',
                             junctions == '730-852;942-1088' ~ 'with_CE',
                             T ~ 'other')) %>%
  group_by(barcode, isoform) %>%
  mutate(n_this_isoform = sum(number_of_reads)) %>%
  ungroup() %>%
  group_by(barcode) %>%
  mutate(frac_this_isoform = n_this_isoform/sum(number_of_reads)) %>%
  distinct(barcode, condition, n_this_isoform, frac_this_isoform, isoform) %>%
  mutate(frac_ce = max(ifelse(isoform == 'with_CE', n_this_isoform, -1)) / sum(n_this_isoform))
## Rows: 3461 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3419 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2629 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3117 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2874 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3798 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2251 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3444 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(all_df %>% distinct(barcode, condition, frac_ce), aes(x = condition, y = 100*frac_ce, fill = condition)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.2, dotsize = 5, alpha = 0.8) +
  ggtitle('Nanopore analysis of B11 in spinal cord') +
  theme_classic() +
  ggsci::scale_fill_npg() +
  ggeasy::easy_remove_legend() +
  ggtitle("") +
  ylab('Cryptic #9 PSI \n(mouse spinal cord)')

 ggsave("markdown_images/raver_spinal_cord/nanopore_results2.pdf", height=5, width=4, units='cm')

Quantify STMN2 western blots

df <- read_csv(paste0(data_dir, '/STMN2 western blot quantification/SKNBE2 western blot quantifications with STMN2 - Sheet1 (1).csv')) %>%
  filter(! lane == 'background')  # background was zero in all cases due to quantification method used
## Rows: 83 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): lane, target, blot
## dbl (1): value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- df %>%
  mutate(line = case_when(lane %in% 1:2 ~ 'Constitutive',
                          lane %in% 3:4 ~ '#6',
                          lane %in% 5:6 ~ '#9',
                          lane %in% 7:8 ~ 'mSc')) %>%
  mutate(line = factor(line, levels = c('Constitutive', '#6', '#9', 'mSc'))) %>%
  mutate(condition = case_when(lane %in% c(1, 3, 5, 7) ~ 'NT',
                               T ~ 'shTDP')) %>%
  group_by(blot, target, line) %>%
  dplyr::select(-lane) %>%
  pivot_wider(names_from = target, values_from = value) %>%
  mutate(normalised_STMN = STMN / tubulin) %>%
  dplyr::select(- STMN, -tubulin) %>%
  pivot_wider(names_from = condition, values_from = normalised_STMN) %>%
  mutate(fraction_of_NT = shTDP/NT) %>%
  mutate(polyclonal_replicate = case_when(str_detect(blot, 'box2') ~ 'first',
                                     T ~ blot)) %>%
  ungroup() %>%
  group_by(polyclonal_replicate, line) %>%
  mutate(mean_fraction_of_NT = mean(fraction_of_NT))

df3 <- df2 %>% ungroup() %>% distinct(polyclonal_replicate, line, mean_fraction_of_NT)

ggplot(df3, aes(x = line, y = 100*mean_fraction_of_NT)) +
  geom_point(alpha = 0.5) +
  ylab('% STMN2 remaining') +
  xlab('Vector') +
  #ggtitle('Quantification of STMN2 from western blots') +
  xlab('Construct') +
  theme_classic() 

 ggsave("markdown_images/STMN2 western blots/quantification.pdf", height=6.5, width=7, units='cm')

Generate table with all vector sequences

backbones <- data.frame(seq = Biostrings::readDNAStringSet('small_data_files/vector_sequences/backbones.fa')) %>%
  rownames_to_column('seq_name')

# TDP-REGv1 mCherry (pTwist)

r3 <- paste(readDNAStringSet('small_data_files/vector_sequences/mCherry/for-patent-aars1-rsp24-in-ptwist-cmv (1).fasta')[1])

r3_pos <- str_replace(str_replace(r3, 'GTAAGAATGCACATCACTTCTTGAGAGTATGGAGGAGTGAAATGACACTCAGTGCCAGAGTTACTGTATATCTACACTTTAAAAGTGTAGCTTTTAAAAGATAAGCAAGCACAATCTTTTGTGTGTGTGTGTGTGAATGTGTGTGTGTGTGTGTGTCACCCAG', ''),
                      'GTATGCATCACCCCCCCAGCTAATTTTTTTTTGTATTTTTTACCGAGTCGGGGTTTCGCAATGTTGCCCAGGCTGGTCTCAGAGTCTCGCTCTGTTGTCTACGCTGGAGTGCAGTAACATGAGCCACTGTGCCCGGCCAATCCTAAGAATTTCTTTTGCGGTGGTTGCAAGTCTGGGCAGAACTCTTGTCAGGGGCTGTAACTGGACTTATCTTTACTCCTTTGTCAG', '')

mcherry_df <- data.frame(vector_name = c('TDP-REGv1 mCherry reporter', 'TDP-REGv1 mCherry reporter positive control'),
                         sequence = c(r3, r3_pos))

all_vectors <- mcherry_df

# TDP-REGv2 mScarlet vectors (pTwist)

positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
  mutate(row = 1:n()) 
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mscar_fa <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/TDP-REGv2 mScarlet vectors.fa')) %>%
  rownames_to_column('Construct') 

mscar_df <- mscar_fa %>%
  inner_join(positions) %>%
    bind_rows(mscar_fa %>% filter(Construct == 'positive_control') %>% dplyr::select(insert) %>% mutate(vector_name = 'mScarlet positive control')) %>%
  mutate(vector_name = case_when(is.na(Order) ~ paste0('TDP-REGv2 mScarlet positive control in pTwist backbone'),
                                T ~ paste0('TDP-REGv2 mScarlet #', Order, ' in pTwist backbone'))) %>%
  mutate(sequence = paste0(backbones$seq[which(backbones$seq_name == 'mscar_upstream')],
                           insert,
                           backbones$seq[which(backbones$seq_name == 'mscar_downstream')])) %>%
  dplyr::select(vector_name, sequence)
## Joining with `by = join_by(Construct)`
all_vectors <- bind_rows(all_vectors, mscar_df)

# TDP-REGv2 mScarlet vectors (pAAV)

all_mscar_AAVs <- Sys.glob("small_data_files/vector_sequences/fluorescent AAVs/*")

mscar_df_AAV <- map_df(all_mscar_AAVs, function(file) {
  this_seq <- paste(readDNAStringSet(file)[1])
  
  data.frame(filename = file, sequence = this_seq) %>%
    filter(!str_detect(filename, 'b12')) %>%
    mutate(vector_name = paste0('pAAV with ',
                                case_when(
                                  str_detect(filename, 'a11') ~ paste0('TDP-REGv2 mScarlet #', 
                                                                       positions$Order[which(positions$Construct == 'A11')]),
                                  str_detect(filename, 'f1') ~ 'TDP-REGv1 mCherry',
                                  str_detect(filename, 'control') ~ 'mScarlet positive control'
                                ))) %>%
    select(vector_name, sequence)
})


all_vectors <- bind_rows(all_vectors, mscar_df_AAV) %>%
  mutate(comments = 'Used for studying TDP-43-mediated regulation of fluorescent protein expression')


# TDP/RAVER vectors (pTwist CMV)

raver_ptwist_WT <- paste0(Biostrings::readDNAStringSet('small_data_files/vector_sequences/RAVER/tdp-43-raver1-only-with-mutations-to-remove-acceptors-in-raver.fasta')[1])

RV_WT_seq <-  'ATAGCAAAGGGTTCGGATT'
RV_2FL_seq <- 'ATAGCAAAGGGCTCGGACT'

raver_ptwist_2FL <- str_replace(raver_ptwist_WT, RV_WT_seq, RV_2FL_seq)

raver_df <- data.frame(vector_name = c('Constitutive TDP-43/Raver1 fusion in pTwist CMV',
                                       'Constitutive TDP-43/Raver1 fusion with 2FL mutation in pTwist CMV'),
                       sequence = c(raver_ptwist_WT, raver_ptwist_2FL),
                       comments = 'Used for showing rescue of mScarlet reporters when co-transfected')

ordered_names <- data.frame(plasmid = c("Plasmid B03", "Plasmid B02",
                                        "Plasmid B04", "Plasmid B07",
                                        "Plasmid B08", "Plasmid B05",
                                        "Plasmid B06", "Plasmid B09",
                                        "Plasmid B11", "Plasmid B10"),
                            construct_number = factor(1:10)) %>%
  mutate(code = word(plasmid, 2)) %>% 
  mutate(code = paste0(str_sub(code, 1, 1), as.numeric(str_sub(code, 2, 3))))

raver_eblock_df <- read_csv("small_data_files/vector_sequences/RAVER/August 2022 Eblock order - Sheet1.csv") %>%
  dplyr::rename(code = `Well Position`) %>%
  inner_join(ordered_names) %>%
  mutate(construct_name = paste0('TDP-REGv2 TDP-43/Raver1 #', construct_number,  ' with 2FL mutation in pTwist CMV vector')) %>%
  mutate(backbone_trim_upstream = str_locate(raver_ptwist_WT, str_sub(Sequence, 1, 10))[, 1] - 1,
         backbone_trim_downstream = str_locate(raver_ptwist_WT, str_sub(Sequence, -10, -1))[, 2] + 1) %>%
  dplyr::rename(eblock = Sequence) %>%
  mutate(sequence = paste0(str_sub(raver_ptwist_2FL, 1, backbone_trim_upstream),
                           eblock,
                           str_sub(raver_ptwist_2FL, backbone_trim_downstream, -1))) %>%
  dplyr::select(vector_name = construct_name, sequence) %>%
  mutate(comments = 'Used for initial test of TDP-43-encoding CEs by RT-PCR')
## Rows: 96 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Well Position, Name, Sequence
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(code)`
raver_df <- bind_rows(raver_df, raver_eblock_df)

# TDP/RAVER vectors (Piggybac for i3)

pig_up <- backbones$seq[which(backbones$seq_name == 'piggybac_upstream')]
pig_down <- backbones$seq[which(backbones$seq_name == 'piggybac_downstream')]

raver_ef1a_piggybacs <- raver_df %>%
  filter(vector_name == 'Constitutive TDP-43/Raver1 fusion in pTwist CMV' | 
           str_detect(vector_name, "#6") | str_detect(vector_name, '#9')) %>%
  bind_rows(data.frame(vector_name = 'mScarlet', 
                       sequence = mscar_df$sequence[which(str_detect(mscar_df$vector_name, 'positive control'))])) %>%
  mutate(trim_insert_start = str_locate(sequence, str_sub(pig_up, -12, -1))[, 2] + 1,
         trim_insert_end = str_locate(sequence, str_sub(pig_down, 1, 10))[, 1] - 1) %>%
  mutate(sequence = paste0(pig_up, str_sub(sequence, trim_insert_start, trim_insert_end), pig_down)) %>%
  mutate(sequence = str_replace(sequence, RV_2FL_seq, RV_WT_seq)) %>%
  mutate(vector_name = paste0(case_when(str_detect(vector_name, 'Constitutive') ~ 'Constitutive TDP-43/Raver1 fusion in ',
                                        str_detect(vector_name, '#6') ~ 'TDP-REGv2 TDP-43/Raver1 #6 in ',
                                        str_detect(vector_name, '#9') ~ 'TDP-REGv2 TDP-43/Raver1 #9 in ',
                                        str_detect(vector_name, 'mScarlet') ~ 'mScarlet positive control in '),
                              'Piggybac vector with EF1A promoter')) %>%
  dplyr::select(vector_name, sequence) %>%
  mutate(comments = 'Used for testing rescue of cryptics in SK-N-BE2 cells.') %>%
  mutate(comments = ifelse(str_detect(vector_name, 'onstitutive'), comments, 
                           paste0(comments, ' Also used for generating i3 Neuron polyclonal lines.')))

# Also need to add piggybac mScarlet


raver_df <- bind_rows(raver_df, raver_ef1a_piggybacs)

# TET-inducible TDP/RAVER/BFP vectors 

dream_files <- Sys.glob('small_data_files/vector_sequences/RAVER/Dream 3/*.fasta')
dream_vectors_df <- map_df(dream_files, function(file){
  this_seq <- paste(readDNAStringSet(file)[1])
  
  data.frame(filename = file, sequence = this_seq) %>%
    mutate(vector_name = case_when(str_detect(filename, 'bfp') ~ 'TET-inducible BFP vector',
                                   str_detect(filename, 'b5') ~ paste0('TET-inducible TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B5')]),
                                   str_detect(filename, 'b11') ~ paste0('TET-inducible TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B11')]),
                                   str_detect(filename, 'rv') ~ paste0('TET-inducible Constitutive TDP/Raver1'))) %>% 
    mutate(vector_name = paste0(vector_name, ' with mScarlet TET3G and BlasticidinR')) %>%
    select(vector_name, sequence) %>%
    mutate(comments = 'Used for growth competition assay')
})

raver_df <- bind_rows(raver_df, dream_vectors_df)

# TDP/RAVER vector (pAAV)                           

raver_AAV <- data.frame(sequence = Biostrings::readDNAStringSet('small_data_files/vector_sequences/RAVER/paav-raver-b11 (1).fasta')) %>%
  mutate(vector_name = paste0('pAAV TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B11')])) %>%
  mutate(comments = 'Used for testing in mouse spinal cord') %>%
  rownames_to_column('ignore') %>%
  dplyr::select(-ignore)

raver_df <- bind_rows(raver_df, raver_AAV)

all_vectors <- bind_rows(all_vectors, raver_df)

# 12QN vectors

QN_files <- Sys.glob('small_data_files/vector_sequences/12QN/*.fasta')
QN_vectors_df <- map_df(QN_files, function(file){
  this_seq <- paste(readDNAStringSet(file)[1])
  
  data.frame(filename = file, sequence = this_seq) %>%
    mutate(vector_name = case_when(str_detect(filename, 'just') ~ 'SNAP-only control in pcDNA3.1',
                                   str_detect(filename, '12qn') ~ '12QN TDP-43 mutant in pcDNA3.1',
                                   str_detect(filename, 'wt') ~ 'WT TDP-43 in pcDNA3.1')) %>%
    select(vector_name, sequence) %>%
    mutate(comments = 'Used for studying activation by TDP-43 aggregation')
})

all_vectors <- bind_rows(all_vectors, QN_vectors_df)

# Cre with 1, 2 or 3 CEs

cre_files <- Sys.glob('small_data_files/vector_sequences/Cre/*.fasta')
cre_vectors_df <- map_df(cre_files, function(file){
  this_seq <- paste(readDNAStringSet(file)[1])
  
  data.frame(filename = file, sequence = this_seq) %>%
    mutate(vector_name = case_when(str_detect(filename, 'triple') ~ 'Cryptic Cre (3 CE) in pTwist CMV',
                                   str_detect(filename, 'double') ~ 'Cryptic Cre (2 CEs) in pTwist CMV',
                                   str_detect(filename, 'without') ~ 'Cryptic Cre (1 CEs) in pTwist CMV')) %>%
    select(vector_name, sequence) %>%
    mutate(comments = 'Used for testing the value of adding extra CEs to a sequence')
})

all_vectors <- bind_rows(all_vectors, cre_vectors_df)

# Prime editing

pe_files <- Sys.glob('small_data_files/vector_sequences/prime editing/*.fasta')
pe_vectors_df <- map_df(pe_files, function(file){
  this_seq <- paste(readDNAStringSet(file)[1])
  
  data.frame(filename = file, sequence = this_seq) %>%
    mutate(vector_name = word(word(filename, 1, sep='\\.'), -1, sep='/')) %>%
    select(vector_name, sequence) %>%
    mutate(vector_name = str_replace(vector_name, '-code-12c', '')) %>%
    mutate(comments = ifelse(str_detect(vector_name, 'flag'), 
                             'Used for western blotting prime editing vectors',
                             'Used for studying if prime editing can be controlled by TDP-43 function'))
})

all_vectors <- bind_rows(all_vectors, pe_vectors_df)

# Luciferase

gluc_names_df <- data.frame(r_bc = 1:7) %>%
  mutate(expected_rname = case_when(r_bc == 1 ~ "design1_Gluc",
                                    r_bc == 2 ~ "Gluc_prepared",
                                    r_bc == 3 ~ "Design2_Gluc_A2",
                                    r_bc == 4 ~ "Design2_Gluc_A3",
                                    r_bc == 5 ~ "Design2_Gluc_A5",
                                    r_bc == 6 ~ "Design2_Gluc_A6",
                                    r_bc == 7 ~ "Design2_Gluc_A7")) %>%
  mutate(paper_name = case_when(r_bc == 1 ~ "TDP-REGv1",
                                r_bc == 2 ~ "+ve",
                                r_bc == 3 ~ "TDP-REGv2\n#1",
                                r_bc == 4 ~ "TDP-REGv2\n#2",
                                r_bc == 5 ~ "TDP-REGv2\n#3",
                                r_bc == 6 ~ "TDP-REGv2\n#4",
                                r_bc == 7 ~ "TDP-REGv2\n#5")) 

gluc_fasta <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/Gluc/gluc_fasta.fa')) %>%
  rownames_to_column('expected_rname') %>%
  inner_join(gluc_names_df) %>%
  mutate(vector_name = paste0(case_when(str_detect(paper_name, 've') ~ 'GLUC luciferase positive control',
                              T ~ paste0('GLUC ', paper_name)),
                              ' in pTwist-CMV')) %>%
  mutate(sequence = paste0(backbones$seq[which(backbones$seq_name == 'gluc_upstream')],
                           insert,
                           backbones$seq[which(backbones$seq_name == 'gluc_downstream')])) %>%
  dplyr::select(vector_name, sequence) %>%
  mutate(comments = 'Used for studying TDP-43-mediated regulation of luciferase expression')
## Joining with `by = join_by(expected_rname)`
all_vectors <- bind_rows(all_vectors, gluc_fasta)

# NEGATIVE CONTROL mCherry HIS FLAG

his_df <- data.frame(sequence = paste(readDNAStringSet('small_data_files/vector_sequences/mCherry/cm1-triflag-7xhis.fasta')[1])) %>%
  mutate(vector_name = 'mCherry negative control with HIS-tag and TriFlag-tag',
         comments = 'Used for assessing leaky expression by western blotting') %>%
  rownames_to_column('ignore') %>%
  dplyr::select(-ignore)

all_vectors <- bind_rows(all_vectors, his_df)

# shRNA vectors

rbps <- c("tdp43", "FUS", "hnRNPA1", "hnRNPK", "hnRNPC")

filenames <- Sys.glob(paste0(data_dir, "/sami/*.csv"))

full_df <- map_df(rbps, function(rbp){
  splash_filename <- filenames[which(str_detect(filenames, rbp) & str_detect(filenames, "splash"))]
  
  top20_filename <- filenames[which(str_detect(filenames, rbp) & str_detect(filenames, "top20"))]
  
  splash_df <- read_csv(splash_filename, skip = 3) %>%
    dplyr::rename(seq = Antisense.Guide.Sequence)
  
  top20_df <- read_csv(top20_filename) %>%
    mutate(fixed_score = as.numeric(str_sub(score, 1, -2))) %>%
    select(seq = shRNAsequence, top20_score = fixed_score)
  
  both_df <- inner_join(splash_df, top20_df, by = "seq") %>%
    mutate(combined_score = 1.488136*top20_score + SplashRNA) %>%
    mutate(rbp = rbp)
})
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (1): Warnings
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (1): Warnings
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
### Get best shRNA sequences

best_df <- full_df %>%
  group_by(rbp) %>%
  slice_max(combined_score, n=3)

ggplot(full_df, aes(x = SplashRNA, y = top20_score, size = combined_score, colour = seq %in% best_df$seq)) +
  geom_point() +
  facet_wrap(~rbp) 

best_df2 <- best_df %>%
  mutate(just_insert = str_replace(str_replace(`97mer.construct`, "TGCTGTTGACAGTGAGCG", ""), "TGCCTACTGCCTCGGA", "")) %>%
  mutate(for_gib = paste0("at", `97mer.construct`, "ct")) %>%
  mutate(forward_oligo = str_sub(for_gib, -60,-1),
         reverse_oligo = rc(str_sub(for_gib, 1, 60)))

shRNA_constructs <- best_df2 %>%
  ungroup() %>%
  group_by(rbp) %>%
  mutate(gib_name = paste0(rbp, "_", 1:n())) %>%
  ungroup() %>%
  select(gib_name, forward_oligo, reverse_oligo, construct = `97mer.construct`) %>%
  pivot_longer(cols = c("forward_oligo", "reverse_oligo")) %>%
  mutate(oligo_name = paste0("sh_", gib_name, ifelse(str_detect(name, "forward"), "_F", "_R"))) %>%
  arrange(name, gib_name) %>%
  mutate(oligo_name = str_sub(oligo_name, 1, -3)) %>%
  dplyr::select(oligo_name, construct) %>%
  distinct() %>%
  mutate(vector_name = oligo_name, 
         sequence = paste0(backbones$seq[which(backbones$seq_name == 'sami_upstream')],
                           construct,
                           backbones$seq[which(backbones$seq_name == 'sami_downstream')])) %>%
  dplyr::select(vector_name, sequence) %>%
  bind_rows(data.frame(vector_name = 'Negative control shRNA plasmid',
                       sequence = this_seq <- paste(readDNAStringSet('small_data_files/vector_sequences/shRNA/piggybac-blasticidin-with-mgl-and-shrna-and-xmai-cut-site.fasta')[1]))) %>%
  mutate(comments = 'Used to test specificity of TDP-REG vectors to TDP-43 knockdown')

all_vectors <- bind_rows(all_vectors, shRNA_constructs)

# BSBS vectors

bsbs <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/bsbs/full_reference_fasta.fa')) %>%
  rownames_to_column('original_vector_name') %>%
  mutate(attempt = word(original_vector_name, 2, sep='_'),
         optimisation_level = word(original_vector_name, 3, sep="_")) %>%
  mutate(vector_name = paste0("SpliceNouveau test attempt ", attempt, " with optimisation level ", optimisation_level)) %>%
  mutate(trim_gluc_up = str_locate(backbones$seq[which(backbones$seq_name == "gluc_upstream")], str_sub(insert, 1, 10))[,1] - 1) %>%
  mutate(trim_gluc_down = str_locate(backbones$seq[which(backbones$seq_name == "gluc_downstream")], str_sub(insert, -10, -1))[,2] + 1) %>%
  mutate(sequence = paste0(str_sub(backbones$seq[which(backbones$seq_name == "gluc_upstream")], 1, trim_gluc_up),
                           insert,
                           str_sub(backbones$seq[which(backbones$seq_name == "gluc_downstream")], trim_gluc_down, -1))) %>%
  dplyr::select(vector_name, sequence) %>%
  mutate(comments = 'Used for testing SpliceNouveau optimisation versus performance')

all_vectors <- bind_rows(all_vectors, bsbs)

write_csv(all_vectors, 'All_vectors_used_in_study.csv')